drm/amdkfd: Enable over-subscription with >1 GWS queue
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/ratelimit.h>
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/list.h>
28 #include <linux/types.h>
29 #include <linux/bitops.h>
30 #include <linux/sched.h>
31 #include "kfd_priv.h"
32 #include "kfd_device_queue_manager.h"
33 #include "kfd_mqd_manager.h"
34 #include "cik_regs.h"
35 #include "kfd_kernel_queue.h"
36 #include "amdgpu_amdkfd.h"
37
38 /* Size of the per-pipe EOP queue */
39 #define CIK_HPD_EOP_BYTES_LOG2 11
40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
41
42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
43                                         unsigned int pasid, unsigned int vmid);
44
45 static int execute_queues_cpsch(struct device_queue_manager *dqm,
46                                 enum kfd_unmap_queues_filter filter,
47                                 uint32_t filter_param);
48 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
49                                 enum kfd_unmap_queues_filter filter,
50                                 uint32_t filter_param);
51
52 static int map_queues_cpsch(struct device_queue_manager *dqm);
53
54 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
55                                 struct queue *q);
56
57 static inline void deallocate_hqd(struct device_queue_manager *dqm,
58                                 struct queue *q);
59 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
60 static int allocate_sdma_queue(struct device_queue_manager *dqm,
61                                 struct queue *q);
62 static void kfd_process_hw_exception(struct work_struct *work);
63
64 static inline
65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
66 {
67         if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
68                 return KFD_MQD_TYPE_SDMA;
69         return KFD_MQD_TYPE_CP;
70 }
71
72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
73 {
74         int i;
75         int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
76                 + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
77
78         /* queue is available for KFD usage if bit is 1 */
79         for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
80                 if (test_bit(pipe_offset + i,
81                               dqm->dev->shared_resources.cp_queue_bitmap))
82                         return true;
83         return false;
84 }
85
86 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
87 {
88         return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
89                                 KGD_MAX_QUEUES);
90 }
91
92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
93 {
94         return dqm->dev->shared_resources.num_queue_per_pipe;
95 }
96
97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
98 {
99         return dqm->dev->shared_resources.num_pipe_per_mec;
100 }
101
102 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
103 {
104         return dqm->dev->device_info->num_sdma_engines;
105 }
106
107 static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
108 {
109         return dqm->dev->device_info->num_xgmi_sdma_engines;
110 }
111
112 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
113 {
114         return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
115 }
116
117 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
118 {
119         return dqm->dev->device_info->num_sdma_engines
120                         * dqm->dev->device_info->num_sdma_queues_per_engine;
121 }
122
123 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
124 {
125         return dqm->dev->device_info->num_xgmi_sdma_engines
126                         * dqm->dev->device_info->num_sdma_queues_per_engine;
127 }
128
129 void program_sh_mem_settings(struct device_queue_manager *dqm,
130                                         struct qcm_process_device *qpd)
131 {
132         return dqm->dev->kfd2kgd->program_sh_mem_settings(
133                                                 dqm->dev->kgd, qpd->vmid,
134                                                 qpd->sh_mem_config,
135                                                 qpd->sh_mem_ape1_base,
136                                                 qpd->sh_mem_ape1_limit,
137                                                 qpd->sh_mem_bases);
138 }
139
140 void increment_queue_count(struct device_queue_manager *dqm,
141                         enum kfd_queue_type type)
142 {
143         dqm->active_queue_count++;
144         if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
145                 dqm->active_cp_queue_count++;
146 }
147
148 void decrement_queue_count(struct device_queue_manager *dqm,
149                         enum kfd_queue_type type)
150 {
151         dqm->active_queue_count--;
152         if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
153                 dqm->active_cp_queue_count--;
154 }
155
156 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
157 {
158         struct kfd_dev *dev = qpd->dqm->dev;
159
160         if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
161                 /* On pre-SOC15 chips we need to use the queue ID to
162                  * preserve the user mode ABI.
163                  */
164                 q->doorbell_id = q->properties.queue_id;
165         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
166                         q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
167                 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
168                  * doorbell assignments based on the engine and queue id.
169                  * The doobell index distance between RLC (2*i) and (2*i+1)
170                  * for a SDMA engine is 512.
171                  */
172                 uint32_t *idx_offset =
173                                 dev->shared_resources.sdma_doorbell_idx;
174
175                 q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
176                         + (q->properties.sdma_queue_id & 1)
177                         * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
178                         + (q->properties.sdma_queue_id >> 1);
179         } else {
180                 /* For CP queues on SOC15 reserve a free doorbell ID */
181                 unsigned int found;
182
183                 found = find_first_zero_bit(qpd->doorbell_bitmap,
184                                             KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
185                 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
186                         pr_debug("No doorbells available");
187                         return -EBUSY;
188                 }
189                 set_bit(found, qpd->doorbell_bitmap);
190                 q->doorbell_id = found;
191         }
192
193         q->properties.doorbell_off =
194                 kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
195                                           q->doorbell_id);
196
197         return 0;
198 }
199
200 static void deallocate_doorbell(struct qcm_process_device *qpd,
201                                 struct queue *q)
202 {
203         unsigned int old;
204         struct kfd_dev *dev = qpd->dqm->dev;
205
206         if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
207             q->properties.type == KFD_QUEUE_TYPE_SDMA ||
208             q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
209                 return;
210
211         old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
212         WARN_ON(!old);
213 }
214
215 static int allocate_vmid(struct device_queue_manager *dqm,
216                         struct qcm_process_device *qpd,
217                         struct queue *q)
218 {
219         int allocated_vmid = -1, i;
220
221         for (i = dqm->dev->vm_info.first_vmid_kfd;
222                         i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
223                 if (!dqm->vmid_pasid[i]) {
224                         allocated_vmid = i;
225                         break;
226                 }
227         }
228
229         if (allocated_vmid < 0) {
230                 pr_err("no more vmid to allocate\n");
231                 return -ENOSPC;
232         }
233
234         pr_debug("vmid allocated: %d\n", allocated_vmid);
235
236         dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
237
238         set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
239
240         qpd->vmid = allocated_vmid;
241         q->properties.vmid = allocated_vmid;
242
243         program_sh_mem_settings(dqm, qpd);
244
245         /* qpd->page_table_base is set earlier when register_process()
246          * is called, i.e. when the first queue is created.
247          */
248         dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
249                         qpd->vmid,
250                         qpd->page_table_base);
251         /* invalidate the VM context after pasid and vmid mapping is set up */
252         kfd_flush_tlb(qpd_to_pdd(qpd));
253
254         if (dqm->dev->kfd2kgd->set_scratch_backing_va)
255                 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
256                                 qpd->sh_hidden_private_base, qpd->vmid);
257
258         return 0;
259 }
260
261 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
262                                 struct qcm_process_device *qpd)
263 {
264         const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
265         int ret;
266
267         if (!qpd->ib_kaddr)
268                 return -ENOMEM;
269
270         ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
271         if (ret)
272                 return ret;
273
274         return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
275                                 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
276                                 pmf->release_mem_size / sizeof(uint32_t));
277 }
278
279 static void deallocate_vmid(struct device_queue_manager *dqm,
280                                 struct qcm_process_device *qpd,
281                                 struct queue *q)
282 {
283         /* On GFX v7, CP doesn't flush TC at dequeue */
284         if (q->device->device_info->asic_family == CHIP_HAWAII)
285                 if (flush_texture_cache_nocpsch(q->device, qpd))
286                         pr_err("Failed to flush TC\n");
287
288         kfd_flush_tlb(qpd_to_pdd(qpd));
289
290         /* Release the vmid mapping */
291         set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
292         dqm->vmid_pasid[qpd->vmid] = 0;
293
294         qpd->vmid = 0;
295         q->properties.vmid = 0;
296 }
297
298 static int create_queue_nocpsch(struct device_queue_manager *dqm,
299                                 struct queue *q,
300                                 struct qcm_process_device *qpd)
301 {
302         struct mqd_manager *mqd_mgr;
303         int retval;
304
305         dqm_lock(dqm);
306
307         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
308                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
309                                 dqm->total_queue_count);
310                 retval = -EPERM;
311                 goto out_unlock;
312         }
313
314         if (list_empty(&qpd->queues_list)) {
315                 retval = allocate_vmid(dqm, qpd, q);
316                 if (retval)
317                         goto out_unlock;
318         }
319         q->properties.vmid = qpd->vmid;
320         /*
321          * Eviction state logic: mark all queues as evicted, even ones
322          * not currently active. Restoring inactive queues later only
323          * updates the is_evicted flag but is a no-op otherwise.
324          */
325         q->properties.is_evicted = !!qpd->evicted;
326
327         q->properties.tba_addr = qpd->tba_addr;
328         q->properties.tma_addr = qpd->tma_addr;
329
330         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
331                         q->properties.type)];
332         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
333                 retval = allocate_hqd(dqm, q);
334                 if (retval)
335                         goto deallocate_vmid;
336                 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
337                         q->pipe, q->queue);
338         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
339                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
340                 retval = allocate_sdma_queue(dqm, q);
341                 if (retval)
342                         goto deallocate_vmid;
343                 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
344         }
345
346         retval = allocate_doorbell(qpd, q);
347         if (retval)
348                 goto out_deallocate_hqd;
349
350         /* Temporarily release dqm lock to avoid a circular lock dependency */
351         dqm_unlock(dqm);
352         q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
353         dqm_lock(dqm);
354
355         if (!q->mqd_mem_obj) {
356                 retval = -ENOMEM;
357                 goto out_deallocate_doorbell;
358         }
359         mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
360                                 &q->gart_mqd_addr, &q->properties);
361         if (q->properties.is_active) {
362                 if (!dqm->sched_running) {
363                         WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
364                         goto add_queue_to_list;
365                 }
366
367                 if (WARN(q->process->mm != current->mm,
368                                         "should only run in user thread"))
369                         retval = -EFAULT;
370                 else
371                         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
372                                         q->queue, &q->properties, current->mm);
373                 if (retval)
374                         goto out_free_mqd;
375         }
376
377 add_queue_to_list:
378         list_add(&q->list, &qpd->queues_list);
379         qpd->queue_count++;
380         if (q->properties.is_active)
381                 increment_queue_count(dqm, q->properties.type);
382
383         /*
384          * Unconditionally increment this counter, regardless of the queue's
385          * type or whether the queue is active.
386          */
387         dqm->total_queue_count++;
388         pr_debug("Total of %d queues are accountable so far\n",
389                         dqm->total_queue_count);
390         goto out_unlock;
391
392 out_free_mqd:
393         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
394 out_deallocate_doorbell:
395         deallocate_doorbell(qpd, q);
396 out_deallocate_hqd:
397         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
398                 deallocate_hqd(dqm, q);
399         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
400                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
401                 deallocate_sdma_queue(dqm, q);
402 deallocate_vmid:
403         if (list_empty(&qpd->queues_list))
404                 deallocate_vmid(dqm, qpd, q);
405 out_unlock:
406         dqm_unlock(dqm);
407         return retval;
408 }
409
410 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
411 {
412         bool set;
413         int pipe, bit, i;
414
415         set = false;
416
417         for (pipe = dqm->next_pipe_to_allocate, i = 0;
418                         i < get_pipes_per_mec(dqm);
419                         pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
420
421                 if (!is_pipe_enabled(dqm, 0, pipe))
422                         continue;
423
424                 if (dqm->allocated_queues[pipe] != 0) {
425                         bit = ffs(dqm->allocated_queues[pipe]) - 1;
426                         dqm->allocated_queues[pipe] &= ~(1 << bit);
427                         q->pipe = pipe;
428                         q->queue = bit;
429                         set = true;
430                         break;
431                 }
432         }
433
434         if (!set)
435                 return -EBUSY;
436
437         pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
438         /* horizontal hqd allocation */
439         dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
440
441         return 0;
442 }
443
444 static inline void deallocate_hqd(struct device_queue_manager *dqm,
445                                 struct queue *q)
446 {
447         dqm->allocated_queues[q->pipe] |= (1 << q->queue);
448 }
449
450 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
451  * to avoid asynchronized access
452  */
453 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
454                                 struct qcm_process_device *qpd,
455                                 struct queue *q)
456 {
457         int retval;
458         struct mqd_manager *mqd_mgr;
459
460         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
461                         q->properties.type)];
462
463         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
464                 deallocate_hqd(dqm, q);
465         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
466                 deallocate_sdma_queue(dqm, q);
467         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
468                 deallocate_sdma_queue(dqm, q);
469         else {
470                 pr_debug("q->properties.type %d is invalid\n",
471                                 q->properties.type);
472                 return -EINVAL;
473         }
474         dqm->total_queue_count--;
475
476         deallocate_doorbell(qpd, q);
477
478         if (!dqm->sched_running) {
479                 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
480                 return 0;
481         }
482
483         retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
484                                 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
485                                 KFD_UNMAP_LATENCY_MS,
486                                 q->pipe, q->queue);
487         if (retval == -ETIME)
488                 qpd->reset_wavefronts = true;
489
490         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
491
492         list_del(&q->list);
493         if (list_empty(&qpd->queues_list)) {
494                 if (qpd->reset_wavefronts) {
495                         pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
496                                         dqm->dev);
497                         /* dbgdev_wave_reset_wavefronts has to be called before
498                          * deallocate_vmid(), i.e. when vmid is still in use.
499                          */
500                         dbgdev_wave_reset_wavefronts(dqm->dev,
501                                         qpd->pqm->process);
502                         qpd->reset_wavefronts = false;
503                 }
504
505                 deallocate_vmid(dqm, qpd, q);
506         }
507         qpd->queue_count--;
508         if (q->properties.is_active) {
509                 decrement_queue_count(dqm, q->properties.type);
510                 if (q->properties.is_gws) {
511                         dqm->gws_queue_count--;
512                         qpd->mapped_gws_queue = false;
513                 }
514         }
515
516         return retval;
517 }
518
519 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
520                                 struct qcm_process_device *qpd,
521                                 struct queue *q)
522 {
523         int retval;
524
525         dqm_lock(dqm);
526         retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
527         dqm_unlock(dqm);
528
529         return retval;
530 }
531
532 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
533 {
534         int retval = 0;
535         struct mqd_manager *mqd_mgr;
536         struct kfd_process_device *pdd;
537         bool prev_active = false;
538
539         dqm_lock(dqm);
540         pdd = kfd_get_process_device_data(q->device, q->process);
541         if (!pdd) {
542                 retval = -ENODEV;
543                 goto out_unlock;
544         }
545         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
546                         q->properties.type)];
547
548         /* Save previous activity state for counters */
549         prev_active = q->properties.is_active;
550
551         /* Make sure the queue is unmapped before updating the MQD */
552         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
553                 retval = unmap_queues_cpsch(dqm,
554                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
555                 if (retval) {
556                         pr_err("unmap queue failed\n");
557                         goto out_unlock;
558                 }
559         } else if (prev_active &&
560                    (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
561                     q->properties.type == KFD_QUEUE_TYPE_SDMA ||
562                     q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
563
564                 if (!dqm->sched_running) {
565                         WARN_ONCE(1, "Update non-HWS queue while stopped\n");
566                         goto out_unlock;
567                 }
568
569                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
570                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
571                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
572                 if (retval) {
573                         pr_err("destroy mqd failed\n");
574                         goto out_unlock;
575                 }
576         }
577
578         mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
579
580         /*
581          * check active state vs. the previous state and modify
582          * counter accordingly. map_queues_cpsch uses the
583          * dqm->active_queue_count to determine whether a new runlist must be
584          * uploaded.
585          */
586         if (q->properties.is_active && !prev_active)
587                 increment_queue_count(dqm, q->properties.type);
588         else if (!q->properties.is_active && prev_active)
589                 decrement_queue_count(dqm, q->properties.type);
590
591         if (q->gws && !q->properties.is_gws) {
592                 if (q->properties.is_active) {
593                         dqm->gws_queue_count++;
594                         pdd->qpd.mapped_gws_queue = true;
595                 }
596                 q->properties.is_gws = true;
597         } else if (!q->gws && q->properties.is_gws) {
598                 if (q->properties.is_active) {
599                         dqm->gws_queue_count--;
600                         pdd->qpd.mapped_gws_queue = false;
601                 }
602                 q->properties.is_gws = false;
603         }
604
605         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
606                 retval = map_queues_cpsch(dqm);
607         else if (q->properties.is_active &&
608                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
609                   q->properties.type == KFD_QUEUE_TYPE_SDMA ||
610                   q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
611                 if (WARN(q->process->mm != current->mm,
612                          "should only run in user thread"))
613                         retval = -EFAULT;
614                 else
615                         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
616                                                    q->pipe, q->queue,
617                                                    &q->properties, current->mm);
618         }
619
620 out_unlock:
621         dqm_unlock(dqm);
622         return retval;
623 }
624
625 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
626                                         struct qcm_process_device *qpd)
627 {
628         struct queue *q;
629         struct mqd_manager *mqd_mgr;
630         struct kfd_process_device *pdd;
631         int retval, ret = 0;
632
633         dqm_lock(dqm);
634         if (qpd->evicted++ > 0) /* already evicted, do nothing */
635                 goto out;
636
637         pdd = qpd_to_pdd(qpd);
638         pr_info_ratelimited("Evicting PASID 0x%x queues\n",
639                             pdd->process->pasid);
640
641         /* Mark all queues as evicted. Deactivate all active queues on
642          * the qpd.
643          */
644         list_for_each_entry(q, &qpd->queues_list, list) {
645                 q->properties.is_evicted = true;
646                 if (!q->properties.is_active)
647                         continue;
648
649                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
650                                 q->properties.type)];
651                 q->properties.is_active = false;
652                 decrement_queue_count(dqm, q->properties.type);
653                 if (q->properties.is_gws) {
654                         dqm->gws_queue_count--;
655                         qpd->mapped_gws_queue = false;
656                 }
657
658                 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
659                         continue;
660
661                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
662                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
663                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
664                 if (retval && !ret)
665                         /* Return the first error, but keep going to
666                          * maintain a consistent eviction state
667                          */
668                         ret = retval;
669         }
670
671 out:
672         dqm_unlock(dqm);
673         return ret;
674 }
675
676 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
677                                       struct qcm_process_device *qpd)
678 {
679         struct queue *q;
680         struct kfd_process_device *pdd;
681         int retval = 0;
682
683         dqm_lock(dqm);
684         if (qpd->evicted++ > 0) /* already evicted, do nothing */
685                 goto out;
686
687         pdd = qpd_to_pdd(qpd);
688         pr_info_ratelimited("Evicting PASID 0x%x queues\n",
689                             pdd->process->pasid);
690
691         /* Mark all queues as evicted. Deactivate all active queues on
692          * the qpd.
693          */
694         list_for_each_entry(q, &qpd->queues_list, list) {
695                 q->properties.is_evicted = true;
696                 if (!q->properties.is_active)
697                         continue;
698
699                 q->properties.is_active = false;
700                 decrement_queue_count(dqm, q->properties.type);
701         }
702         retval = execute_queues_cpsch(dqm,
703                                 qpd->is_debug ?
704                                 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
705                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
706
707 out:
708         dqm_unlock(dqm);
709         return retval;
710 }
711
712 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
713                                           struct qcm_process_device *qpd)
714 {
715         struct mm_struct *mm = NULL;
716         struct queue *q;
717         struct mqd_manager *mqd_mgr;
718         struct kfd_process_device *pdd;
719         uint64_t pd_base;
720         int retval, ret = 0;
721
722         pdd = qpd_to_pdd(qpd);
723         /* Retrieve PD base */
724         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
725
726         dqm_lock(dqm);
727         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
728                 goto out;
729         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
730                 qpd->evicted--;
731                 goto out;
732         }
733
734         pr_info_ratelimited("Restoring PASID 0x%x queues\n",
735                             pdd->process->pasid);
736
737         /* Update PD Base in QPD */
738         qpd->page_table_base = pd_base;
739         pr_debug("Updated PD address to 0x%llx\n", pd_base);
740
741         if (!list_empty(&qpd->queues_list)) {
742                 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
743                                 dqm->dev->kgd,
744                                 qpd->vmid,
745                                 qpd->page_table_base);
746                 kfd_flush_tlb(pdd);
747         }
748
749         /* Take a safe reference to the mm_struct, which may otherwise
750          * disappear even while the kfd_process is still referenced.
751          */
752         mm = get_task_mm(pdd->process->lead_thread);
753         if (!mm) {
754                 ret = -EFAULT;
755                 goto out;
756         }
757
758         /* Remove the eviction flags. Activate queues that are not
759          * inactive for other reasons.
760          */
761         list_for_each_entry(q, &qpd->queues_list, list) {
762                 q->properties.is_evicted = false;
763                 if (!QUEUE_IS_ACTIVE(q->properties))
764                         continue;
765
766                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
767                                 q->properties.type)];
768                 q->properties.is_active = true;
769                 increment_queue_count(dqm, q->properties.type);
770                 if (q->properties.is_gws) {
771                         dqm->gws_queue_count++;
772                         qpd->mapped_gws_queue = true;
773                 }
774
775                 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
776                         continue;
777
778                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
779                                        q->queue, &q->properties, mm);
780                 if (retval && !ret)
781                         /* Return the first error, but keep going to
782                          * maintain a consistent eviction state
783                          */
784                         ret = retval;
785         }
786         qpd->evicted = 0;
787 out:
788         if (mm)
789                 mmput(mm);
790         dqm_unlock(dqm);
791         return ret;
792 }
793
794 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
795                                         struct qcm_process_device *qpd)
796 {
797         struct queue *q;
798         struct kfd_process_device *pdd;
799         uint64_t pd_base;
800         int retval = 0;
801
802         pdd = qpd_to_pdd(qpd);
803         /* Retrieve PD base */
804         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
805
806         dqm_lock(dqm);
807         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
808                 goto out;
809         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
810                 qpd->evicted--;
811                 goto out;
812         }
813
814         pr_info_ratelimited("Restoring PASID 0x%x queues\n",
815                             pdd->process->pasid);
816
817         /* Update PD Base in QPD */
818         qpd->page_table_base = pd_base;
819         pr_debug("Updated PD address to 0x%llx\n", pd_base);
820
821         /* activate all active queues on the qpd */
822         list_for_each_entry(q, &qpd->queues_list, list) {
823                 q->properties.is_evicted = false;
824                 if (!QUEUE_IS_ACTIVE(q->properties))
825                         continue;
826
827                 q->properties.is_active = true;
828                 increment_queue_count(dqm, q->properties.type);
829         }
830         retval = execute_queues_cpsch(dqm,
831                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
832         qpd->evicted = 0;
833 out:
834         dqm_unlock(dqm);
835         return retval;
836 }
837
838 static int register_process(struct device_queue_manager *dqm,
839                                         struct qcm_process_device *qpd)
840 {
841         struct device_process_node *n;
842         struct kfd_process_device *pdd;
843         uint64_t pd_base;
844         int retval;
845
846         n = kzalloc(sizeof(*n), GFP_KERNEL);
847         if (!n)
848                 return -ENOMEM;
849
850         n->qpd = qpd;
851
852         pdd = qpd_to_pdd(qpd);
853         /* Retrieve PD base */
854         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
855
856         dqm_lock(dqm);
857         list_add(&n->list, &dqm->queues);
858
859         /* Update PD Base in QPD */
860         qpd->page_table_base = pd_base;
861         pr_debug("Updated PD address to 0x%llx\n", pd_base);
862
863         retval = dqm->asic_ops.update_qpd(dqm, qpd);
864
865         dqm->processes_count++;
866
867         dqm_unlock(dqm);
868
869         /* Outside the DQM lock because under the DQM lock we can't do
870          * reclaim or take other locks that others hold while reclaiming.
871          */
872         kfd_inc_compute_active(dqm->dev);
873
874         return retval;
875 }
876
877 static int unregister_process(struct device_queue_manager *dqm,
878                                         struct qcm_process_device *qpd)
879 {
880         int retval;
881         struct device_process_node *cur, *next;
882
883         pr_debug("qpd->queues_list is %s\n",
884                         list_empty(&qpd->queues_list) ? "empty" : "not empty");
885
886         retval = 0;
887         dqm_lock(dqm);
888
889         list_for_each_entry_safe(cur, next, &dqm->queues, list) {
890                 if (qpd == cur->qpd) {
891                         list_del(&cur->list);
892                         kfree(cur);
893                         dqm->processes_count--;
894                         goto out;
895                 }
896         }
897         /* qpd not found in dqm list */
898         retval = 1;
899 out:
900         dqm_unlock(dqm);
901
902         /* Outside the DQM lock because under the DQM lock we can't do
903          * reclaim or take other locks that others hold while reclaiming.
904          */
905         if (!retval)
906                 kfd_dec_compute_active(dqm->dev);
907
908         return retval;
909 }
910
911 static int
912 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
913                         unsigned int vmid)
914 {
915         return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
916                                                 dqm->dev->kgd, pasid, vmid);
917 }
918
919 static void init_interrupts(struct device_queue_manager *dqm)
920 {
921         unsigned int i;
922
923         for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
924                 if (is_pipe_enabled(dqm, 0, i))
925                         dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
926 }
927
928 static int initialize_nocpsch(struct device_queue_manager *dqm)
929 {
930         int pipe, queue;
931
932         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
933
934         dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
935                                         sizeof(unsigned int), GFP_KERNEL);
936         if (!dqm->allocated_queues)
937                 return -ENOMEM;
938
939         mutex_init(&dqm->lock_hidden);
940         INIT_LIST_HEAD(&dqm->queues);
941         dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
942         dqm->active_cp_queue_count = 0;
943         dqm->gws_queue_count = 0;
944
945         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
946                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
947
948                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
949                         if (test_bit(pipe_offset + queue,
950                                      dqm->dev->shared_resources.cp_queue_bitmap))
951                                 dqm->allocated_queues[pipe] |= 1 << queue;
952         }
953
954         memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
955
956         dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
957         dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
958
959         return 0;
960 }
961
962 static void uninitialize(struct device_queue_manager *dqm)
963 {
964         int i;
965
966         WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
967
968         kfree(dqm->allocated_queues);
969         for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
970                 kfree(dqm->mqd_mgrs[i]);
971         mutex_destroy(&dqm->lock_hidden);
972 }
973
974 static int start_nocpsch(struct device_queue_manager *dqm)
975 {
976         pr_info("SW scheduler is used");
977         init_interrupts(dqm);
978         
979         if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
980                 return pm_init(&dqm->packets, dqm);
981         dqm->sched_running = true;
982
983         return 0;
984 }
985
986 static int stop_nocpsch(struct device_queue_manager *dqm)
987 {
988         if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
989                 pm_uninit(&dqm->packets, false);
990         dqm->sched_running = false;
991
992         return 0;
993 }
994
995 static void pre_reset(struct device_queue_manager *dqm)
996 {
997         dqm_lock(dqm);
998         dqm->is_resetting = true;
999         dqm_unlock(dqm);
1000 }
1001
1002 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1003                                 struct queue *q)
1004 {
1005         int bit;
1006
1007         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1008                 if (dqm->sdma_bitmap == 0) {
1009                         pr_err("No more SDMA queue to allocate\n");
1010                         return -ENOMEM;
1011                 }
1012
1013                 bit = __ffs64(dqm->sdma_bitmap);
1014                 dqm->sdma_bitmap &= ~(1ULL << bit);
1015                 q->sdma_id = bit;
1016                 q->properties.sdma_engine_id = q->sdma_id %
1017                                 get_num_sdma_engines(dqm);
1018                 q->properties.sdma_queue_id = q->sdma_id /
1019                                 get_num_sdma_engines(dqm);
1020         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1021                 if (dqm->xgmi_sdma_bitmap == 0) {
1022                         pr_err("No more XGMI SDMA queue to allocate\n");
1023                         return -ENOMEM;
1024                 }
1025                 bit = __ffs64(dqm->xgmi_sdma_bitmap);
1026                 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1027                 q->sdma_id = bit;
1028                 /* sdma_engine_id is sdma id including
1029                  * both PCIe-optimized SDMAs and XGMI-
1030                  * optimized SDMAs. The calculation below
1031                  * assumes the first N engines are always
1032                  * PCIe-optimized ones
1033                  */
1034                 q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
1035                                 q->sdma_id % get_num_xgmi_sdma_engines(dqm);
1036                 q->properties.sdma_queue_id = q->sdma_id /
1037                                 get_num_xgmi_sdma_engines(dqm);
1038         }
1039
1040         pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1041         pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1042
1043         return 0;
1044 }
1045
1046 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1047                                 struct queue *q)
1048 {
1049         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1050                 if (q->sdma_id >= get_num_sdma_queues(dqm))
1051                         return;
1052                 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1053         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1054                 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1055                         return;
1056                 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1057         }
1058 }
1059
1060 /*
1061  * Device Queue Manager implementation for cp scheduler
1062  */
1063
1064 static int set_sched_resources(struct device_queue_manager *dqm)
1065 {
1066         int i, mec;
1067         struct scheduling_resources res;
1068
1069         res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1070
1071         res.queue_mask = 0;
1072         for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1073                 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1074                         / dqm->dev->shared_resources.num_pipe_per_mec;
1075
1076                 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1077                         continue;
1078
1079                 /* only acquire queues from the first MEC */
1080                 if (mec > 0)
1081                         continue;
1082
1083                 /* This situation may be hit in the future if a new HW
1084                  * generation exposes more than 64 queues. If so, the
1085                  * definition of res.queue_mask needs updating
1086                  */
1087                 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1088                         pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1089                         break;
1090                 }
1091
1092                 res.queue_mask |= (1ull << i);
1093         }
1094         res.gws_mask = ~0ull;
1095         res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1096
1097         pr_debug("Scheduling resources:\n"
1098                         "vmid mask: 0x%8X\n"
1099                         "queue mask: 0x%8llX\n",
1100                         res.vmid_mask, res.queue_mask);
1101
1102         return pm_send_set_resources(&dqm->packets, &res);
1103 }
1104
1105 static int initialize_cpsch(struct device_queue_manager *dqm)
1106 {
1107         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1108
1109         mutex_init(&dqm->lock_hidden);
1110         INIT_LIST_HEAD(&dqm->queues);
1111         dqm->active_queue_count = dqm->processes_count = 0;
1112         dqm->active_cp_queue_count = 0;
1113         dqm->gws_queue_count = 0;
1114         dqm->active_runlist = false;
1115         dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
1116         dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
1117
1118         INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1119
1120         return 0;
1121 }
1122
1123 static int start_cpsch(struct device_queue_manager *dqm)
1124 {
1125         int retval;
1126
1127         retval = 0;
1128
1129         retval = pm_init(&dqm->packets, dqm);
1130         if (retval)
1131                 goto fail_packet_manager_init;
1132
1133         retval = set_sched_resources(dqm);
1134         if (retval)
1135                 goto fail_set_sched_resources;
1136
1137         pr_debug("Allocating fence memory\n");
1138
1139         /* allocate fence memory on the gart */
1140         retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1141                                         &dqm->fence_mem);
1142
1143         if (retval)
1144                 goto fail_allocate_vidmem;
1145
1146         dqm->fence_addr = dqm->fence_mem->cpu_ptr;
1147         dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1148
1149         init_interrupts(dqm);
1150
1151         dqm_lock(dqm);
1152         /* clear hang status when driver try to start the hw scheduler */
1153         dqm->is_hws_hang = false;
1154         dqm->is_resetting = false;
1155         dqm->sched_running = true;
1156         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1157         dqm_unlock(dqm);
1158
1159         return 0;
1160 fail_allocate_vidmem:
1161 fail_set_sched_resources:
1162         pm_uninit(&dqm->packets, false);
1163 fail_packet_manager_init:
1164         return retval;
1165 }
1166
1167 static int stop_cpsch(struct device_queue_manager *dqm)
1168 {
1169         bool hanging;
1170
1171         dqm_lock(dqm);
1172         if (!dqm->is_hws_hang)
1173                 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1174         hanging = dqm->is_hws_hang || dqm->is_resetting;
1175         dqm->sched_running = false;
1176         dqm_unlock(dqm);
1177
1178         kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1179         pm_uninit(&dqm->packets, hanging);
1180
1181         return 0;
1182 }
1183
1184 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1185                                         struct kernel_queue *kq,
1186                                         struct qcm_process_device *qpd)
1187 {
1188         dqm_lock(dqm);
1189         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1190                 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1191                                 dqm->total_queue_count);
1192                 dqm_unlock(dqm);
1193                 return -EPERM;
1194         }
1195
1196         /*
1197          * Unconditionally increment this counter, regardless of the queue's
1198          * type or whether the queue is active.
1199          */
1200         dqm->total_queue_count++;
1201         pr_debug("Total of %d queues are accountable so far\n",
1202                         dqm->total_queue_count);
1203
1204         list_add(&kq->list, &qpd->priv_queue_list);
1205         increment_queue_count(dqm, kq->queue->properties.type);
1206         qpd->is_debug = true;
1207         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1208         dqm_unlock(dqm);
1209
1210         return 0;
1211 }
1212
1213 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1214                                         struct kernel_queue *kq,
1215                                         struct qcm_process_device *qpd)
1216 {
1217         dqm_lock(dqm);
1218         list_del(&kq->list);
1219         decrement_queue_count(dqm, kq->queue->properties.type);
1220         qpd->is_debug = false;
1221         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1222         /*
1223          * Unconditionally decrement this counter, regardless of the queue's
1224          * type.
1225          */
1226         dqm->total_queue_count--;
1227         pr_debug("Total of %d queues are accountable so far\n",
1228                         dqm->total_queue_count);
1229         dqm_unlock(dqm);
1230 }
1231
1232 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1233                         struct qcm_process_device *qpd)
1234 {
1235         int retval;
1236         struct mqd_manager *mqd_mgr;
1237
1238         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1239                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1240                                 dqm->total_queue_count);
1241                 retval = -EPERM;
1242                 goto out;
1243         }
1244
1245         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1246                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1247                 dqm_lock(dqm);
1248                 retval = allocate_sdma_queue(dqm, q);
1249                 dqm_unlock(dqm);
1250                 if (retval)
1251                         goto out;
1252         }
1253
1254         retval = allocate_doorbell(qpd, q);
1255         if (retval)
1256                 goto out_deallocate_sdma_queue;
1257
1258         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1259                         q->properties.type)];
1260
1261         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1262                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1263                 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1264         q->properties.tba_addr = qpd->tba_addr;
1265         q->properties.tma_addr = qpd->tma_addr;
1266         q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1267         if (!q->mqd_mem_obj) {
1268                 retval = -ENOMEM;
1269                 goto out_deallocate_doorbell;
1270         }
1271
1272         dqm_lock(dqm);
1273         /*
1274          * Eviction state logic: mark all queues as evicted, even ones
1275          * not currently active. Restoring inactive queues later only
1276          * updates the is_evicted flag but is a no-op otherwise.
1277          */
1278         q->properties.is_evicted = !!qpd->evicted;
1279         mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1280                                 &q->gart_mqd_addr, &q->properties);
1281
1282         list_add(&q->list, &qpd->queues_list);
1283         qpd->queue_count++;
1284
1285         if (q->properties.is_active) {
1286                 increment_queue_count(dqm, q->properties.type);
1287
1288                 retval = execute_queues_cpsch(dqm,
1289                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1290         }
1291
1292         /*
1293          * Unconditionally increment this counter, regardless of the queue's
1294          * type or whether the queue is active.
1295          */
1296         dqm->total_queue_count++;
1297
1298         pr_debug("Total of %d queues are accountable so far\n",
1299                         dqm->total_queue_count);
1300
1301         dqm_unlock(dqm);
1302         return retval;
1303
1304 out_deallocate_doorbell:
1305         deallocate_doorbell(qpd, q);
1306 out_deallocate_sdma_queue:
1307         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1308                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1309                 dqm_lock(dqm);
1310                 deallocate_sdma_queue(dqm, q);
1311                 dqm_unlock(dqm);
1312         }
1313 out:
1314         return retval;
1315 }
1316
1317 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1318                                 unsigned int fence_value,
1319                                 unsigned int timeout_ms)
1320 {
1321         unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1322
1323         while (*fence_addr != fence_value) {
1324                 if (time_after(jiffies, end_jiffies)) {
1325                         pr_err("qcm fence wait loop timeout expired\n");
1326                         /* In HWS case, this is used to halt the driver thread
1327                          * in order not to mess up CP states before doing
1328                          * scandumps for FW debugging.
1329                          */
1330                         while (halt_if_hws_hang)
1331                                 schedule();
1332
1333                         return -ETIME;
1334                 }
1335                 schedule();
1336         }
1337
1338         return 0;
1339 }
1340
1341 /* dqm->lock mutex has to be locked before calling this function */
1342 static int map_queues_cpsch(struct device_queue_manager *dqm)
1343 {
1344         int retval;
1345
1346         if (!dqm->sched_running)
1347                 return 0;
1348         if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1349                 return 0;
1350         if (dqm->active_runlist)
1351                 return 0;
1352
1353         retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1354         pr_debug("%s sent runlist\n", __func__);
1355         if (retval) {
1356                 pr_err("failed to execute runlist\n");
1357                 return retval;
1358         }
1359         dqm->active_runlist = true;
1360
1361         return retval;
1362 }
1363
1364 /* dqm->lock mutex has to be locked before calling this function */
1365 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1366                                 enum kfd_unmap_queues_filter filter,
1367                                 uint32_t filter_param)
1368 {
1369         int retval = 0;
1370
1371         if (!dqm->sched_running)
1372                 return 0;
1373         if (dqm->is_hws_hang)
1374                 return -EIO;
1375         if (!dqm->active_runlist)
1376                 return retval;
1377
1378         retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1379                         filter, filter_param, false, 0);
1380         if (retval)
1381                 return retval;
1382
1383         *dqm->fence_addr = KFD_FENCE_INIT;
1384         pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1385                                 KFD_FENCE_COMPLETED);
1386         /* should be timed out */
1387         retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1388                                 queue_preemption_timeout_ms);
1389         if (retval) {
1390                 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1391                 dqm->is_hws_hang = true;
1392                 /* It's possible we're detecting a HWS hang in the
1393                  * middle of a GPU reset. No need to schedule another
1394                  * reset in this case.
1395                  */
1396                 if (!dqm->is_resetting)
1397                         schedule_work(&dqm->hw_exception_work);
1398                 return retval;
1399         }
1400
1401         pm_release_ib(&dqm->packets);
1402         dqm->active_runlist = false;
1403
1404         return retval;
1405 }
1406
1407 /* dqm->lock mutex has to be locked before calling this function */
1408 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1409                                 enum kfd_unmap_queues_filter filter,
1410                                 uint32_t filter_param)
1411 {
1412         int retval;
1413
1414         if (dqm->is_hws_hang)
1415                 return -EIO;
1416         retval = unmap_queues_cpsch(dqm, filter, filter_param);
1417         if (retval)
1418                 return retval;
1419
1420         return map_queues_cpsch(dqm);
1421 }
1422
1423 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1424                                 struct qcm_process_device *qpd,
1425                                 struct queue *q)
1426 {
1427         int retval;
1428         struct mqd_manager *mqd_mgr;
1429
1430         retval = 0;
1431
1432         /* remove queue from list to prevent rescheduling after preemption */
1433         dqm_lock(dqm);
1434
1435         if (qpd->is_debug) {
1436                 /*
1437                  * error, currently we do not allow to destroy a queue
1438                  * of a currently debugged process
1439                  */
1440                 retval = -EBUSY;
1441                 goto failed_try_destroy_debugged_queue;
1442
1443         }
1444
1445         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1446                         q->properties.type)];
1447
1448         deallocate_doorbell(qpd, q);
1449
1450         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1451                 deallocate_sdma_queue(dqm, q);
1452         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1453                 deallocate_sdma_queue(dqm, q);
1454
1455         list_del(&q->list);
1456         qpd->queue_count--;
1457         if (q->properties.is_active) {
1458                 decrement_queue_count(dqm, q->properties.type);
1459                 retval = execute_queues_cpsch(dqm,
1460                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1461                 if (retval == -ETIME)
1462                         qpd->reset_wavefronts = true;
1463                 if (q->properties.is_gws) {
1464                         dqm->gws_queue_count--;
1465                         qpd->mapped_gws_queue = false;
1466                 }
1467         }
1468
1469         /*
1470          * Unconditionally decrement this counter, regardless of the queue's
1471          * type
1472          */
1473         dqm->total_queue_count--;
1474         pr_debug("Total of %d queues are accountable so far\n",
1475                         dqm->total_queue_count);
1476
1477         dqm_unlock(dqm);
1478
1479         /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1480         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1481
1482         return retval;
1483
1484 failed_try_destroy_debugged_queue:
1485
1486         dqm_unlock(dqm);
1487         return retval;
1488 }
1489
1490 /*
1491  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1492  * stay in user mode.
1493  */
1494 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1495 /* APE1 limit is inclusive and 64K aligned. */
1496 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1497
1498 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1499                                    struct qcm_process_device *qpd,
1500                                    enum cache_policy default_policy,
1501                                    enum cache_policy alternate_policy,
1502                                    void __user *alternate_aperture_base,
1503                                    uint64_t alternate_aperture_size)
1504 {
1505         bool retval = true;
1506
1507         if (!dqm->asic_ops.set_cache_memory_policy)
1508                 return retval;
1509
1510         dqm_lock(dqm);
1511
1512         if (alternate_aperture_size == 0) {
1513                 /* base > limit disables APE1 */
1514                 qpd->sh_mem_ape1_base = 1;
1515                 qpd->sh_mem_ape1_limit = 0;
1516         } else {
1517                 /*
1518                  * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1519                  *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
1520                  * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1521                  *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1522                  * Verify that the base and size parameters can be
1523                  * represented in this format and convert them.
1524                  * Additionally restrict APE1 to user-mode addresses.
1525                  */
1526
1527                 uint64_t base = (uintptr_t)alternate_aperture_base;
1528                 uint64_t limit = base + alternate_aperture_size - 1;
1529
1530                 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1531                    (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1532                         retval = false;
1533                         goto out;
1534                 }
1535
1536                 qpd->sh_mem_ape1_base = base >> 16;
1537                 qpd->sh_mem_ape1_limit = limit >> 16;
1538         }
1539
1540         retval = dqm->asic_ops.set_cache_memory_policy(
1541                         dqm,
1542                         qpd,
1543                         default_policy,
1544                         alternate_policy,
1545                         alternate_aperture_base,
1546                         alternate_aperture_size);
1547
1548         if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1549                 program_sh_mem_settings(dqm, qpd);
1550
1551         pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1552                 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1553                 qpd->sh_mem_ape1_limit);
1554
1555 out:
1556         dqm_unlock(dqm);
1557         return retval;
1558 }
1559
1560 static int set_trap_handler(struct device_queue_manager *dqm,
1561                                 struct qcm_process_device *qpd,
1562                                 uint64_t tba_addr,
1563                                 uint64_t tma_addr)
1564 {
1565         uint64_t *tma;
1566
1567         if (dqm->dev->cwsr_enabled) {
1568                 /* Jump from CWSR trap handler to user trap */
1569                 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1570                 tma[0] = tba_addr;
1571                 tma[1] = tma_addr;
1572         } else {
1573                 qpd->tba_addr = tba_addr;
1574                 qpd->tma_addr = tma_addr;
1575         }
1576
1577         return 0;
1578 }
1579
1580 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1581                 struct qcm_process_device *qpd)
1582 {
1583         struct queue *q, *next;
1584         struct device_process_node *cur, *next_dpn;
1585         int retval = 0;
1586         bool found = false;
1587
1588         dqm_lock(dqm);
1589
1590         /* Clear all user mode queues */
1591         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1592                 int ret;
1593
1594                 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1595                 if (ret)
1596                         retval = ret;
1597         }
1598
1599         /* Unregister process */
1600         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1601                 if (qpd == cur->qpd) {
1602                         list_del(&cur->list);
1603                         kfree(cur);
1604                         dqm->processes_count--;
1605                         found = true;
1606                         break;
1607                 }
1608         }
1609
1610         dqm_unlock(dqm);
1611
1612         /* Outside the DQM lock because under the DQM lock we can't do
1613          * reclaim or take other locks that others hold while reclaiming.
1614          */
1615         if (found)
1616                 kfd_dec_compute_active(dqm->dev);
1617
1618         return retval;
1619 }
1620
1621 static int get_wave_state(struct device_queue_manager *dqm,
1622                           struct queue *q,
1623                           void __user *ctl_stack,
1624                           u32 *ctl_stack_used_size,
1625                           u32 *save_area_used_size)
1626 {
1627         struct mqd_manager *mqd_mgr;
1628         int r;
1629
1630         dqm_lock(dqm);
1631
1632         if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1633             q->properties.is_active || !q->device->cwsr_enabled) {
1634                 r = -EINVAL;
1635                 goto dqm_unlock;
1636         }
1637
1638         mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
1639
1640         if (!mqd_mgr->get_wave_state) {
1641                 r = -EINVAL;
1642                 goto dqm_unlock;
1643         }
1644
1645         r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1646                         ctl_stack_used_size, save_area_used_size);
1647
1648 dqm_unlock:
1649         dqm_unlock(dqm);
1650         return r;
1651 }
1652
1653 static int process_termination_cpsch(struct device_queue_manager *dqm,
1654                 struct qcm_process_device *qpd)
1655 {
1656         int retval;
1657         struct queue *q, *next;
1658         struct kernel_queue *kq, *kq_next;
1659         struct mqd_manager *mqd_mgr;
1660         struct device_process_node *cur, *next_dpn;
1661         enum kfd_unmap_queues_filter filter =
1662                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1663         bool found = false;
1664
1665         retval = 0;
1666
1667         dqm_lock(dqm);
1668
1669         /* Clean all kernel queues */
1670         list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1671                 list_del(&kq->list);
1672                 decrement_queue_count(dqm, kq->queue->properties.type);
1673                 qpd->is_debug = false;
1674                 dqm->total_queue_count--;
1675                 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1676         }
1677
1678         /* Clear all user mode queues */
1679         list_for_each_entry(q, &qpd->queues_list, list) {
1680                 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1681                         deallocate_sdma_queue(dqm, q);
1682                 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1683                         deallocate_sdma_queue(dqm, q);
1684
1685                 if (q->properties.is_active) {
1686                         decrement_queue_count(dqm, q->properties.type);
1687                         if (q->properties.is_gws) {
1688                                 dqm->gws_queue_count--;
1689                                 qpd->mapped_gws_queue = false;
1690                         }
1691                 }
1692
1693                 dqm->total_queue_count--;
1694         }
1695
1696         /* Unregister process */
1697         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1698                 if (qpd == cur->qpd) {
1699                         list_del(&cur->list);
1700                         kfree(cur);
1701                         dqm->processes_count--;
1702                         found = true;
1703                         break;
1704                 }
1705         }
1706
1707         retval = execute_queues_cpsch(dqm, filter, 0);
1708         if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1709                 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1710                 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1711                 qpd->reset_wavefronts = false;
1712         }
1713
1714         dqm_unlock(dqm);
1715
1716         /* Outside the DQM lock because under the DQM lock we can't do
1717          * reclaim or take other locks that others hold while reclaiming.
1718          */
1719         if (found)
1720                 kfd_dec_compute_active(dqm->dev);
1721
1722         /* Lastly, free mqd resources.
1723          * Do free_mqd() after dqm_unlock to avoid circular locking.
1724          */
1725         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1726                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1727                                 q->properties.type)];
1728                 list_del(&q->list);
1729                 qpd->queue_count--;
1730                 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1731         }
1732
1733         return retval;
1734 }
1735
1736 static int init_mqd_managers(struct device_queue_manager *dqm)
1737 {
1738         int i, j;
1739         struct mqd_manager *mqd_mgr;
1740
1741         for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1742                 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1743                 if (!mqd_mgr) {
1744                         pr_err("mqd manager [%d] initialization failed\n", i);
1745                         goto out_free;
1746                 }
1747                 dqm->mqd_mgrs[i] = mqd_mgr;
1748         }
1749
1750         return 0;
1751
1752 out_free:
1753         for (j = 0; j < i; j++) {
1754                 kfree(dqm->mqd_mgrs[j]);
1755                 dqm->mqd_mgrs[j] = NULL;
1756         }
1757
1758         return -ENOMEM;
1759 }
1760
1761 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1762 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1763 {
1764         int retval;
1765         struct kfd_dev *dev = dqm->dev;
1766         struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1767         uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1768                 get_num_all_sdma_engines(dqm) *
1769                 dev->device_info->num_sdma_queues_per_engine +
1770                 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1771
1772         retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1773                 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1774                 (void *)&(mem_obj->cpu_ptr), false);
1775
1776         return retval;
1777 }
1778
1779 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1780 {
1781         struct device_queue_manager *dqm;
1782
1783         pr_debug("Loading device queue manager\n");
1784
1785         dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1786         if (!dqm)
1787                 return NULL;
1788
1789         switch (dev->device_info->asic_family) {
1790         /* HWS is not available on Hawaii. */
1791         case CHIP_HAWAII:
1792         /* HWS depends on CWSR for timely dequeue. CWSR is not
1793          * available on Tonga.
1794          *
1795          * FIXME: This argument also applies to Kaveri.
1796          */
1797         case CHIP_TONGA:
1798                 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1799                 break;
1800         default:
1801                 dqm->sched_policy = sched_policy;
1802                 break;
1803         }
1804
1805         dqm->dev = dev;
1806         switch (dqm->sched_policy) {
1807         case KFD_SCHED_POLICY_HWS:
1808         case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1809                 /* initialize dqm for cp scheduling */
1810                 dqm->ops.create_queue = create_queue_cpsch;
1811                 dqm->ops.initialize = initialize_cpsch;
1812                 dqm->ops.start = start_cpsch;
1813                 dqm->ops.stop = stop_cpsch;
1814                 dqm->ops.pre_reset = pre_reset;
1815                 dqm->ops.destroy_queue = destroy_queue_cpsch;
1816                 dqm->ops.update_queue = update_queue;
1817                 dqm->ops.register_process = register_process;
1818                 dqm->ops.unregister_process = unregister_process;
1819                 dqm->ops.uninitialize = uninitialize;
1820                 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1821                 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1822                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1823                 dqm->ops.set_trap_handler = set_trap_handler;
1824                 dqm->ops.process_termination = process_termination_cpsch;
1825                 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1826                 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1827                 dqm->ops.get_wave_state = get_wave_state;
1828                 break;
1829         case KFD_SCHED_POLICY_NO_HWS:
1830                 /* initialize dqm for no cp scheduling */
1831                 dqm->ops.start = start_nocpsch;
1832                 dqm->ops.stop = stop_nocpsch;
1833                 dqm->ops.pre_reset = pre_reset;
1834                 dqm->ops.create_queue = create_queue_nocpsch;
1835                 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1836                 dqm->ops.update_queue = update_queue;
1837                 dqm->ops.register_process = register_process;
1838                 dqm->ops.unregister_process = unregister_process;
1839                 dqm->ops.initialize = initialize_nocpsch;
1840                 dqm->ops.uninitialize = uninitialize;
1841                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1842                 dqm->ops.set_trap_handler = set_trap_handler;
1843                 dqm->ops.process_termination = process_termination_nocpsch;
1844                 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1845                 dqm->ops.restore_process_queues =
1846                         restore_process_queues_nocpsch;
1847                 dqm->ops.get_wave_state = get_wave_state;
1848                 break;
1849         default:
1850                 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1851                 goto out_free;
1852         }
1853
1854         switch (dev->device_info->asic_family) {
1855         case CHIP_CARRIZO:
1856                 device_queue_manager_init_vi(&dqm->asic_ops);
1857                 break;
1858
1859         case CHIP_KAVERI:
1860                 device_queue_manager_init_cik(&dqm->asic_ops);
1861                 break;
1862
1863         case CHIP_HAWAII:
1864                 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1865                 break;
1866
1867         case CHIP_TONGA:
1868         case CHIP_FIJI:
1869         case CHIP_POLARIS10:
1870         case CHIP_POLARIS11:
1871         case CHIP_POLARIS12:
1872         case CHIP_VEGAM:
1873                 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1874                 break;
1875
1876         case CHIP_VEGA10:
1877         case CHIP_VEGA12:
1878         case CHIP_VEGA20:
1879         case CHIP_RAVEN:
1880         case CHIP_RENOIR:
1881         case CHIP_ARCTURUS:
1882                 device_queue_manager_init_v9(&dqm->asic_ops);
1883                 break;
1884         case CHIP_NAVI10:
1885         case CHIP_NAVI12:
1886         case CHIP_NAVI14:
1887                 device_queue_manager_init_v10_navi10(&dqm->asic_ops);
1888                 break;
1889         default:
1890                 WARN(1, "Unexpected ASIC family %u",
1891                      dev->device_info->asic_family);
1892                 goto out_free;
1893         }
1894
1895         if (init_mqd_managers(dqm))
1896                 goto out_free;
1897
1898         if (allocate_hiq_sdma_mqd(dqm)) {
1899                 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1900                 goto out_free;
1901         }
1902
1903         if (!dqm->ops.initialize(dqm))
1904                 return dqm;
1905
1906 out_free:
1907         kfree(dqm);
1908         return NULL;
1909 }
1910
1911 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
1912                                     struct kfd_mem_obj *mqd)
1913 {
1914         WARN(!mqd, "No hiq sdma mqd trunk to free");
1915
1916         amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1917 }
1918
1919 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1920 {
1921         dqm->ops.uninitialize(dqm);
1922         deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
1923         kfree(dqm);
1924 }
1925
1926 int kfd_process_vm_fault(struct device_queue_manager *dqm,
1927                          unsigned int pasid)
1928 {
1929         struct kfd_process_device *pdd;
1930         struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1931         int ret = 0;
1932
1933         if (!p)
1934                 return -EINVAL;
1935         pdd = kfd_get_process_device_data(dqm->dev, p);
1936         if (pdd)
1937                 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
1938         kfd_unref_process(p);
1939
1940         return ret;
1941 }
1942
1943 static void kfd_process_hw_exception(struct work_struct *work)
1944 {
1945         struct device_queue_manager *dqm = container_of(work,
1946                         struct device_queue_manager, hw_exception_work);
1947         amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
1948 }
1949
1950 #if defined(CONFIG_DEBUG_FS)
1951
1952 static void seq_reg_dump(struct seq_file *m,
1953                          uint32_t (*dump)[2], uint32_t n_regs)
1954 {
1955         uint32_t i, count;
1956
1957         for (i = 0, count = 0; i < n_regs; i++) {
1958                 if (count == 0 ||
1959                     dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
1960                         seq_printf(m, "%s    %08x: %08x",
1961                                    i ? "\n" : "",
1962                                    dump[i][0], dump[i][1]);
1963                         count = 7;
1964                 } else {
1965                         seq_printf(m, " %08x", dump[i][1]);
1966                         count--;
1967                 }
1968         }
1969
1970         seq_puts(m, "\n");
1971 }
1972
1973 int dqm_debugfs_hqds(struct seq_file *m, void *data)
1974 {
1975         struct device_queue_manager *dqm = data;
1976         uint32_t (*dump)[2], n_regs;
1977         int pipe, queue;
1978         int r = 0;
1979
1980         if (!dqm->sched_running) {
1981                 seq_printf(m, " Device is stopped\n");
1982
1983                 return 0;
1984         }
1985
1986         r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
1987                                         KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
1988                                         &dump, &n_regs);
1989         if (!r) {
1990                 seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
1991                            KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
1992                            KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
1993                            KFD_CIK_HIQ_QUEUE);
1994                 seq_reg_dump(m, dump, n_regs);
1995
1996                 kfree(dump);
1997         }
1998
1999         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2000                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
2001
2002                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2003                         if (!test_bit(pipe_offset + queue,
2004                                       dqm->dev->shared_resources.cp_queue_bitmap))
2005                                 continue;
2006
2007                         r = dqm->dev->kfd2kgd->hqd_dump(
2008                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2009                         if (r)
2010                                 break;
2011
2012                         seq_printf(m, "  CP Pipe %d, Queue %d\n",
2013                                   pipe, queue);
2014                         seq_reg_dump(m, dump, n_regs);
2015
2016                         kfree(dump);
2017                 }
2018         }
2019
2020         for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2021                 for (queue = 0;
2022                      queue < dqm->dev->device_info->num_sdma_queues_per_engine;
2023                      queue++) {
2024                         r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2025                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2026                         if (r)
2027                                 break;
2028
2029                         seq_printf(m, "  SDMA Engine %d, RLC %d\n",
2030                                   pipe, queue);
2031                         seq_reg_dump(m, dump, n_regs);
2032
2033                         kfree(dump);
2034                 }
2035         }
2036
2037         return r;
2038 }
2039
2040 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
2041 {
2042         int r = 0;
2043
2044         dqm_lock(dqm);
2045         dqm->active_runlist = true;
2046         r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2047         dqm_unlock(dqm);
2048
2049         return r;
2050 }
2051
2052 #endif