Merge branch 'page-refs' (page ref overflow)
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/ratelimit.h>
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/list.h>
28 #include <linux/types.h>
29 #include <linux/bitops.h>
30 #include <linux/sched.h>
31 #include "kfd_priv.h"
32 #include "kfd_device_queue_manager.h"
33 #include "kfd_mqd_manager.h"
34 #include "cik_regs.h"
35 #include "kfd_kernel_queue.h"
36 #include "amdgpu_amdkfd.h"
37
38 /* Size of the per-pipe EOP queue */
39 #define CIK_HPD_EOP_BYTES_LOG2 11
40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
41
42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
43                                         unsigned int pasid, unsigned int vmid);
44
45 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
46                                         struct queue *q,
47                                         struct qcm_process_device *qpd);
48
49 static int execute_queues_cpsch(struct device_queue_manager *dqm,
50                                 enum kfd_unmap_queues_filter filter,
51                                 uint32_t filter_param);
52 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
53                                 enum kfd_unmap_queues_filter filter,
54                                 uint32_t filter_param);
55
56 static int map_queues_cpsch(struct device_queue_manager *dqm);
57
58 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
59                                         struct queue *q,
60                                         struct qcm_process_device *qpd);
61
62 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
63                                 unsigned int sdma_queue_id);
64
65 static void kfd_process_hw_exception(struct work_struct *work);
66
67 static inline
68 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
69 {
70         if (type == KFD_QUEUE_TYPE_SDMA)
71                 return KFD_MQD_TYPE_SDMA;
72         return KFD_MQD_TYPE_CP;
73 }
74
75 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
76 {
77         int i;
78         int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
79                 + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
80
81         /* queue is available for KFD usage if bit is 1 */
82         for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
83                 if (test_bit(pipe_offset + i,
84                               dqm->dev->shared_resources.queue_bitmap))
85                         return true;
86         return false;
87 }
88
89 unsigned int get_queues_num(struct device_queue_manager *dqm)
90 {
91         return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
92                                 KGD_MAX_QUEUES);
93 }
94
95 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
96 {
97         return dqm->dev->shared_resources.num_queue_per_pipe;
98 }
99
100 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
101 {
102         return dqm->dev->shared_resources.num_pipe_per_mec;
103 }
104
105 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
106 {
107         return dqm->dev->device_info->num_sdma_engines;
108 }
109
110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
111 {
112         return dqm->dev->device_info->num_sdma_engines
113                         * dqm->dev->device_info->num_sdma_queues_per_engine;
114 }
115
116 void program_sh_mem_settings(struct device_queue_manager *dqm,
117                                         struct qcm_process_device *qpd)
118 {
119         return dqm->dev->kfd2kgd->program_sh_mem_settings(
120                                                 dqm->dev->kgd, qpd->vmid,
121                                                 qpd->sh_mem_config,
122                                                 qpd->sh_mem_ape1_base,
123                                                 qpd->sh_mem_ape1_limit,
124                                                 qpd->sh_mem_bases);
125 }
126
127 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
128 {
129         struct kfd_dev *dev = qpd->dqm->dev;
130
131         if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
132                 /* On pre-SOC15 chips we need to use the queue ID to
133                  * preserve the user mode ABI.
134                  */
135                 q->doorbell_id = q->properties.queue_id;
136         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
137                 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
138                  * doorbell assignments based on the engine and queue id.
139                  * The doobell index distance between RLC (2*i) and (2*i+1)
140                  * for a SDMA engine is 512.
141                  */
142                 uint32_t *idx_offset =
143                                 dev->shared_resources.sdma_doorbell_idx;
144
145                 q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
146                         + (q->properties.sdma_queue_id & 1)
147                         * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
148                         + (q->properties.sdma_queue_id >> 1);
149         } else {
150                 /* For CP queues on SOC15 reserve a free doorbell ID */
151                 unsigned int found;
152
153                 found = find_first_zero_bit(qpd->doorbell_bitmap,
154                                             KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
155                 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
156                         pr_debug("No doorbells available");
157                         return -EBUSY;
158                 }
159                 set_bit(found, qpd->doorbell_bitmap);
160                 q->doorbell_id = found;
161         }
162
163         q->properties.doorbell_off =
164                 kfd_doorbell_id_to_offset(dev, q->process,
165                                           q->doorbell_id);
166
167         return 0;
168 }
169
170 static void deallocate_doorbell(struct qcm_process_device *qpd,
171                                 struct queue *q)
172 {
173         unsigned int old;
174         struct kfd_dev *dev = qpd->dqm->dev;
175
176         if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
177             q->properties.type == KFD_QUEUE_TYPE_SDMA)
178                 return;
179
180         old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
181         WARN_ON(!old);
182 }
183
184 static int allocate_vmid(struct device_queue_manager *dqm,
185                         struct qcm_process_device *qpd,
186                         struct queue *q)
187 {
188         int bit, allocated_vmid;
189
190         if (dqm->vmid_bitmap == 0)
191                 return -ENOMEM;
192
193         bit = ffs(dqm->vmid_bitmap) - 1;
194         dqm->vmid_bitmap &= ~(1 << bit);
195
196         allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
197         pr_debug("vmid allocation %d\n", allocated_vmid);
198         qpd->vmid = allocated_vmid;
199         q->properties.vmid = allocated_vmid;
200
201         set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
202         program_sh_mem_settings(dqm, qpd);
203
204         /* qpd->page_table_base is set earlier when register_process()
205          * is called, i.e. when the first queue is created.
206          */
207         dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
208                         qpd->vmid,
209                         qpd->page_table_base);
210         /* invalidate the VM context after pasid and vmid mapping is set up */
211         kfd_flush_tlb(qpd_to_pdd(qpd));
212
213         return 0;
214 }
215
216 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
217                                 struct qcm_process_device *qpd)
218 {
219         const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
220         int ret;
221
222         if (!qpd->ib_kaddr)
223                 return -ENOMEM;
224
225         ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
226         if (ret)
227                 return ret;
228
229         return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
230                                 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
231                                 pmf->release_mem_size / sizeof(uint32_t));
232 }
233
234 static void deallocate_vmid(struct device_queue_manager *dqm,
235                                 struct qcm_process_device *qpd,
236                                 struct queue *q)
237 {
238         int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
239
240         /* On GFX v7, CP doesn't flush TC at dequeue */
241         if (q->device->device_info->asic_family == CHIP_HAWAII)
242                 if (flush_texture_cache_nocpsch(q->device, qpd))
243                         pr_err("Failed to flush TC\n");
244
245         kfd_flush_tlb(qpd_to_pdd(qpd));
246
247         /* Release the vmid mapping */
248         set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
249
250         dqm->vmid_bitmap |= (1 << bit);
251         qpd->vmid = 0;
252         q->properties.vmid = 0;
253 }
254
255 static int create_queue_nocpsch(struct device_queue_manager *dqm,
256                                 struct queue *q,
257                                 struct qcm_process_device *qpd)
258 {
259         int retval;
260
261         print_queue(q);
262
263         dqm_lock(dqm);
264
265         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
266                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
267                                 dqm->total_queue_count);
268                 retval = -EPERM;
269                 goto out_unlock;
270         }
271
272         if (list_empty(&qpd->queues_list)) {
273                 retval = allocate_vmid(dqm, qpd, q);
274                 if (retval)
275                         goto out_unlock;
276         }
277         q->properties.vmid = qpd->vmid;
278         /*
279          * Eviction state logic: we only mark active queues as evicted
280          * to avoid the overhead of restoring inactive queues later
281          */
282         if (qpd->evicted)
283                 q->properties.is_evicted = (q->properties.queue_size > 0 &&
284                                             q->properties.queue_percent > 0 &&
285                                             q->properties.queue_address != 0);
286
287         q->properties.tba_addr = qpd->tba_addr;
288         q->properties.tma_addr = qpd->tma_addr;
289
290         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
291                 retval = create_compute_queue_nocpsch(dqm, q, qpd);
292         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
293                 retval = create_sdma_queue_nocpsch(dqm, q, qpd);
294         else
295                 retval = -EINVAL;
296
297         if (retval) {
298                 if (list_empty(&qpd->queues_list))
299                         deallocate_vmid(dqm, qpd, q);
300                 goto out_unlock;
301         }
302
303         list_add(&q->list, &qpd->queues_list);
304         qpd->queue_count++;
305         if (q->properties.is_active)
306                 dqm->queue_count++;
307
308         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
309                 dqm->sdma_queue_count++;
310
311         /*
312          * Unconditionally increment this counter, regardless of the queue's
313          * type or whether the queue is active.
314          */
315         dqm->total_queue_count++;
316         pr_debug("Total of %d queues are accountable so far\n",
317                         dqm->total_queue_count);
318
319 out_unlock:
320         dqm_unlock(dqm);
321         return retval;
322 }
323
324 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
325 {
326         bool set;
327         int pipe, bit, i;
328
329         set = false;
330
331         for (pipe = dqm->next_pipe_to_allocate, i = 0;
332                         i < get_pipes_per_mec(dqm);
333                         pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
334
335                 if (!is_pipe_enabled(dqm, 0, pipe))
336                         continue;
337
338                 if (dqm->allocated_queues[pipe] != 0) {
339                         bit = ffs(dqm->allocated_queues[pipe]) - 1;
340                         dqm->allocated_queues[pipe] &= ~(1 << bit);
341                         q->pipe = pipe;
342                         q->queue = bit;
343                         set = true;
344                         break;
345                 }
346         }
347
348         if (!set)
349                 return -EBUSY;
350
351         pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
352         /* horizontal hqd allocation */
353         dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
354
355         return 0;
356 }
357
358 static inline void deallocate_hqd(struct device_queue_manager *dqm,
359                                 struct queue *q)
360 {
361         dqm->allocated_queues[q->pipe] |= (1 << q->queue);
362 }
363
364 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
365                                         struct queue *q,
366                                         struct qcm_process_device *qpd)
367 {
368         struct mqd_manager *mqd_mgr;
369         int retval;
370
371         mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
372         if (!mqd_mgr)
373                 return -ENOMEM;
374
375         retval = allocate_hqd(dqm, q);
376         if (retval)
377                 return retval;
378
379         retval = allocate_doorbell(qpd, q);
380         if (retval)
381                 goto out_deallocate_hqd;
382
383         retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
384                                 &q->gart_mqd_addr, &q->properties);
385         if (retval)
386                 goto out_deallocate_doorbell;
387
388         pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
389                         q->pipe, q->queue);
390
391         dqm->dev->kfd2kgd->set_scratch_backing_va(
392                         dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
393
394         if (!q->properties.is_active)
395                 return 0;
396
397         if (WARN(q->process->mm != current->mm,
398                  "should only run in user thread"))
399                 retval = -EFAULT;
400         else
401                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
402                                            &q->properties, current->mm);
403         if (retval)
404                 goto out_uninit_mqd;
405
406         return 0;
407
408 out_uninit_mqd:
409         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
410 out_deallocate_doorbell:
411         deallocate_doorbell(qpd, q);
412 out_deallocate_hqd:
413         deallocate_hqd(dqm, q);
414
415         return retval;
416 }
417
418 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
419  * to avoid asynchronized access
420  */
421 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
422                                 struct qcm_process_device *qpd,
423                                 struct queue *q)
424 {
425         int retval;
426         struct mqd_manager *mqd_mgr;
427
428         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
429                 get_mqd_type_from_queue_type(q->properties.type));
430         if (!mqd_mgr)
431                 return -ENOMEM;
432
433         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
434                 deallocate_hqd(dqm, q);
435         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
436                 dqm->sdma_queue_count--;
437                 deallocate_sdma_queue(dqm, q->sdma_id);
438         } else {
439                 pr_debug("q->properties.type %d is invalid\n",
440                                 q->properties.type);
441                 return -EINVAL;
442         }
443         dqm->total_queue_count--;
444
445         deallocate_doorbell(qpd, q);
446
447         retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
448                                 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
449                                 KFD_UNMAP_LATENCY_MS,
450                                 q->pipe, q->queue);
451         if (retval == -ETIME)
452                 qpd->reset_wavefronts = true;
453
454         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
455
456         list_del(&q->list);
457         if (list_empty(&qpd->queues_list)) {
458                 if (qpd->reset_wavefronts) {
459                         pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
460                                         dqm->dev);
461                         /* dbgdev_wave_reset_wavefronts has to be called before
462                          * deallocate_vmid(), i.e. when vmid is still in use.
463                          */
464                         dbgdev_wave_reset_wavefronts(dqm->dev,
465                                         qpd->pqm->process);
466                         qpd->reset_wavefronts = false;
467                 }
468
469                 deallocate_vmid(dqm, qpd, q);
470         }
471         qpd->queue_count--;
472         if (q->properties.is_active)
473                 dqm->queue_count--;
474
475         return retval;
476 }
477
478 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
479                                 struct qcm_process_device *qpd,
480                                 struct queue *q)
481 {
482         int retval;
483
484         dqm_lock(dqm);
485         retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
486         dqm_unlock(dqm);
487
488         return retval;
489 }
490
491 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
492 {
493         int retval;
494         struct mqd_manager *mqd_mgr;
495         struct kfd_process_device *pdd;
496         bool prev_active = false;
497
498         dqm_lock(dqm);
499         pdd = kfd_get_process_device_data(q->device, q->process);
500         if (!pdd) {
501                 retval = -ENODEV;
502                 goto out_unlock;
503         }
504         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
505                         get_mqd_type_from_queue_type(q->properties.type));
506         if (!mqd_mgr) {
507                 retval = -ENOMEM;
508                 goto out_unlock;
509         }
510         /*
511          * Eviction state logic: we only mark active queues as evicted
512          * to avoid the overhead of restoring inactive queues later
513          */
514         if (pdd->qpd.evicted)
515                 q->properties.is_evicted = (q->properties.queue_size > 0 &&
516                                             q->properties.queue_percent > 0 &&
517                                             q->properties.queue_address != 0);
518
519         /* Save previous activity state for counters */
520         prev_active = q->properties.is_active;
521
522         /* Make sure the queue is unmapped before updating the MQD */
523         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
524                 retval = unmap_queues_cpsch(dqm,
525                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
526                 if (retval) {
527                         pr_err("unmap queue failed\n");
528                         goto out_unlock;
529                 }
530         } else if (prev_active &&
531                    (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
532                     q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
533                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
534                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
535                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
536                 if (retval) {
537                         pr_err("destroy mqd failed\n");
538                         goto out_unlock;
539                 }
540         }
541
542         retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
543
544         /*
545          * check active state vs. the previous state and modify
546          * counter accordingly. map_queues_cpsch uses the
547          * dqm->queue_count to determine whether a new runlist must be
548          * uploaded.
549          */
550         if (q->properties.is_active && !prev_active)
551                 dqm->queue_count++;
552         else if (!q->properties.is_active && prev_active)
553                 dqm->queue_count--;
554
555         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
556                 retval = map_queues_cpsch(dqm);
557         else if (q->properties.is_active &&
558                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
559                   q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
560                 if (WARN(q->process->mm != current->mm,
561                          "should only run in user thread"))
562                         retval = -EFAULT;
563                 else
564                         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
565                                                    q->pipe, q->queue,
566                                                    &q->properties, current->mm);
567         }
568
569 out_unlock:
570         dqm_unlock(dqm);
571         return retval;
572 }
573
574 static struct mqd_manager *get_mqd_manager(
575                 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
576 {
577         struct mqd_manager *mqd_mgr;
578
579         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
580                 return NULL;
581
582         pr_debug("mqd type %d\n", type);
583
584         mqd_mgr = dqm->mqd_mgrs[type];
585         if (!mqd_mgr) {
586                 mqd_mgr = mqd_manager_init(type, dqm->dev);
587                 if (!mqd_mgr)
588                         pr_err("mqd manager is NULL");
589                 dqm->mqd_mgrs[type] = mqd_mgr;
590         }
591
592         return mqd_mgr;
593 }
594
595 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
596                                         struct qcm_process_device *qpd)
597 {
598         struct queue *q;
599         struct mqd_manager *mqd_mgr;
600         struct kfd_process_device *pdd;
601         int retval = 0;
602
603         dqm_lock(dqm);
604         if (qpd->evicted++ > 0) /* already evicted, do nothing */
605                 goto out;
606
607         pdd = qpd_to_pdd(qpd);
608         pr_info_ratelimited("Evicting PASID %u queues\n",
609                             pdd->process->pasid);
610
611         /* unactivate all active queues on the qpd */
612         list_for_each_entry(q, &qpd->queues_list, list) {
613                 if (!q->properties.is_active)
614                         continue;
615                 mqd_mgr = dqm->ops.get_mqd_manager(dqm,
616                         get_mqd_type_from_queue_type(q->properties.type));
617                 if (!mqd_mgr) { /* should not be here */
618                         pr_err("Cannot evict queue, mqd mgr is NULL\n");
619                         retval = -ENOMEM;
620                         goto out;
621                 }
622                 q->properties.is_evicted = true;
623                 q->properties.is_active = false;
624                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
625                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
626                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
627                 if (retval)
628                         goto out;
629                 dqm->queue_count--;
630         }
631
632 out:
633         dqm_unlock(dqm);
634         return retval;
635 }
636
637 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
638                                       struct qcm_process_device *qpd)
639 {
640         struct queue *q;
641         struct kfd_process_device *pdd;
642         int retval = 0;
643
644         dqm_lock(dqm);
645         if (qpd->evicted++ > 0) /* already evicted, do nothing */
646                 goto out;
647
648         pdd = qpd_to_pdd(qpd);
649         pr_info_ratelimited("Evicting PASID %u queues\n",
650                             pdd->process->pasid);
651
652         /* unactivate all active queues on the qpd */
653         list_for_each_entry(q, &qpd->queues_list, list) {
654                 if (!q->properties.is_active)
655                         continue;
656                 q->properties.is_evicted = true;
657                 q->properties.is_active = false;
658                 dqm->queue_count--;
659         }
660         retval = execute_queues_cpsch(dqm,
661                                 qpd->is_debug ?
662                                 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
663                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
664
665 out:
666         dqm_unlock(dqm);
667         return retval;
668 }
669
670 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
671                                           struct qcm_process_device *qpd)
672 {
673         struct mm_struct *mm = NULL;
674         struct queue *q;
675         struct mqd_manager *mqd_mgr;
676         struct kfd_process_device *pdd;
677         uint64_t pd_base;
678         int retval = 0;
679
680         pdd = qpd_to_pdd(qpd);
681         /* Retrieve PD base */
682         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
683
684         dqm_lock(dqm);
685         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
686                 goto out;
687         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
688                 qpd->evicted--;
689                 goto out;
690         }
691
692         pr_info_ratelimited("Restoring PASID %u queues\n",
693                             pdd->process->pasid);
694
695         /* Update PD Base in QPD */
696         qpd->page_table_base = pd_base;
697         pr_debug("Updated PD address to 0x%llx\n", pd_base);
698
699         if (!list_empty(&qpd->queues_list)) {
700                 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
701                                 dqm->dev->kgd,
702                                 qpd->vmid,
703                                 qpd->page_table_base);
704                 kfd_flush_tlb(pdd);
705         }
706
707         /* Take a safe reference to the mm_struct, which may otherwise
708          * disappear even while the kfd_process is still referenced.
709          */
710         mm = get_task_mm(pdd->process->lead_thread);
711         if (!mm) {
712                 retval = -EFAULT;
713                 goto out;
714         }
715
716         /* activate all active queues on the qpd */
717         list_for_each_entry(q, &qpd->queues_list, list) {
718                 if (!q->properties.is_evicted)
719                         continue;
720                 mqd_mgr = dqm->ops.get_mqd_manager(dqm,
721                         get_mqd_type_from_queue_type(q->properties.type));
722                 if (!mqd_mgr) { /* should not be here */
723                         pr_err("Cannot restore queue, mqd mgr is NULL\n");
724                         retval = -ENOMEM;
725                         goto out;
726                 }
727                 q->properties.is_evicted = false;
728                 q->properties.is_active = true;
729                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
730                                        q->queue, &q->properties, mm);
731                 if (retval)
732                         goto out;
733                 dqm->queue_count++;
734         }
735         qpd->evicted = 0;
736 out:
737         if (mm)
738                 mmput(mm);
739         dqm_unlock(dqm);
740         return retval;
741 }
742
743 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
744                                         struct qcm_process_device *qpd)
745 {
746         struct queue *q;
747         struct kfd_process_device *pdd;
748         uint64_t pd_base;
749         int retval = 0;
750
751         pdd = qpd_to_pdd(qpd);
752         /* Retrieve PD base */
753         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
754
755         dqm_lock(dqm);
756         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
757                 goto out;
758         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
759                 qpd->evicted--;
760                 goto out;
761         }
762
763         pr_info_ratelimited("Restoring PASID %u queues\n",
764                             pdd->process->pasid);
765
766         /* Update PD Base in QPD */
767         qpd->page_table_base = pd_base;
768         pr_debug("Updated PD address to 0x%llx\n", pd_base);
769
770         /* activate all active queues on the qpd */
771         list_for_each_entry(q, &qpd->queues_list, list) {
772                 if (!q->properties.is_evicted)
773                         continue;
774                 q->properties.is_evicted = false;
775                 q->properties.is_active = true;
776                 dqm->queue_count++;
777         }
778         retval = execute_queues_cpsch(dqm,
779                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
780         if (!retval)
781                 qpd->evicted = 0;
782 out:
783         dqm_unlock(dqm);
784         return retval;
785 }
786
787 static int register_process(struct device_queue_manager *dqm,
788                                         struct qcm_process_device *qpd)
789 {
790         struct device_process_node *n;
791         struct kfd_process_device *pdd;
792         uint64_t pd_base;
793         int retval;
794
795         n = kzalloc(sizeof(*n), GFP_KERNEL);
796         if (!n)
797                 return -ENOMEM;
798
799         n->qpd = qpd;
800
801         pdd = qpd_to_pdd(qpd);
802         /* Retrieve PD base */
803         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
804
805         dqm_lock(dqm);
806         list_add(&n->list, &dqm->queues);
807
808         /* Update PD Base in QPD */
809         qpd->page_table_base = pd_base;
810         pr_debug("Updated PD address to 0x%llx\n", pd_base);
811
812         retval = dqm->asic_ops.update_qpd(dqm, qpd);
813
814         if (dqm->processes_count++ == 0)
815                 amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false);
816
817         dqm_unlock(dqm);
818
819         return retval;
820 }
821
822 static int unregister_process(struct device_queue_manager *dqm,
823                                         struct qcm_process_device *qpd)
824 {
825         int retval;
826         struct device_process_node *cur, *next;
827
828         pr_debug("qpd->queues_list is %s\n",
829                         list_empty(&qpd->queues_list) ? "empty" : "not empty");
830
831         retval = 0;
832         dqm_lock(dqm);
833
834         list_for_each_entry_safe(cur, next, &dqm->queues, list) {
835                 if (qpd == cur->qpd) {
836                         list_del(&cur->list);
837                         kfree(cur);
838                         if (--dqm->processes_count == 0)
839                                 amdgpu_amdkfd_set_compute_idle(
840                                         dqm->dev->kgd, true);
841                         goto out;
842                 }
843         }
844         /* qpd not found in dqm list */
845         retval = 1;
846 out:
847         dqm_unlock(dqm);
848         return retval;
849 }
850
851 static int
852 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
853                         unsigned int vmid)
854 {
855         return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
856                                                 dqm->dev->kgd, pasid, vmid);
857 }
858
859 static void init_interrupts(struct device_queue_manager *dqm)
860 {
861         unsigned int i;
862
863         for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
864                 if (is_pipe_enabled(dqm, 0, i))
865                         dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
866 }
867
868 static int initialize_nocpsch(struct device_queue_manager *dqm)
869 {
870         int pipe, queue;
871
872         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
873
874         dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
875                                         sizeof(unsigned int), GFP_KERNEL);
876         if (!dqm->allocated_queues)
877                 return -ENOMEM;
878
879         mutex_init(&dqm->lock_hidden);
880         INIT_LIST_HEAD(&dqm->queues);
881         dqm->queue_count = dqm->next_pipe_to_allocate = 0;
882         dqm->sdma_queue_count = 0;
883
884         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
885                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
886
887                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
888                         if (test_bit(pipe_offset + queue,
889                                      dqm->dev->shared_resources.queue_bitmap))
890                                 dqm->allocated_queues[pipe] |= 1 << queue;
891         }
892
893         dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
894         dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
895
896         return 0;
897 }
898
899 static void uninitialize(struct device_queue_manager *dqm)
900 {
901         int i;
902
903         WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
904
905         kfree(dqm->allocated_queues);
906         for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
907                 kfree(dqm->mqd_mgrs[i]);
908         mutex_destroy(&dqm->lock_hidden);
909         kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
910 }
911
912 static int start_nocpsch(struct device_queue_manager *dqm)
913 {
914         init_interrupts(dqm);
915         return pm_init(&dqm->packets, dqm);
916 }
917
918 static int stop_nocpsch(struct device_queue_manager *dqm)
919 {
920         pm_uninit(&dqm->packets);
921         return 0;
922 }
923
924 static int allocate_sdma_queue(struct device_queue_manager *dqm,
925                                 unsigned int *sdma_queue_id)
926 {
927         int bit;
928
929         if (dqm->sdma_bitmap == 0)
930                 return -ENOMEM;
931
932         bit = ffs(dqm->sdma_bitmap) - 1;
933         dqm->sdma_bitmap &= ~(1 << bit);
934         *sdma_queue_id = bit;
935
936         return 0;
937 }
938
939 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
940                                 unsigned int sdma_queue_id)
941 {
942         if (sdma_queue_id >= get_num_sdma_queues(dqm))
943                 return;
944         dqm->sdma_bitmap |= (1 << sdma_queue_id);
945 }
946
947 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
948                                         struct queue *q,
949                                         struct qcm_process_device *qpd)
950 {
951         struct mqd_manager *mqd_mgr;
952         int retval;
953
954         mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
955         if (!mqd_mgr)
956                 return -ENOMEM;
957
958         retval = allocate_sdma_queue(dqm, &q->sdma_id);
959         if (retval)
960                 return retval;
961
962         q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
963         q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
964
965         retval = allocate_doorbell(qpd, q);
966         if (retval)
967                 goto out_deallocate_sdma_queue;
968
969         pr_debug("SDMA id is:    %d\n", q->sdma_id);
970         pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
971         pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
972
973         dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
974         retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
975                                 &q->gart_mqd_addr, &q->properties);
976         if (retval)
977                 goto out_deallocate_doorbell;
978
979         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties,
980                                 NULL);
981         if (retval)
982                 goto out_uninit_mqd;
983
984         return 0;
985
986 out_uninit_mqd:
987         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
988 out_deallocate_doorbell:
989         deallocate_doorbell(qpd, q);
990 out_deallocate_sdma_queue:
991         deallocate_sdma_queue(dqm, q->sdma_id);
992
993         return retval;
994 }
995
996 /*
997  * Device Queue Manager implementation for cp scheduler
998  */
999
1000 static int set_sched_resources(struct device_queue_manager *dqm)
1001 {
1002         int i, mec;
1003         struct scheduling_resources res;
1004
1005         res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1006
1007         res.queue_mask = 0;
1008         for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1009                 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1010                         / dqm->dev->shared_resources.num_pipe_per_mec;
1011
1012                 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
1013                         continue;
1014
1015                 /* only acquire queues from the first MEC */
1016                 if (mec > 0)
1017                         continue;
1018
1019                 /* This situation may be hit in the future if a new HW
1020                  * generation exposes more than 64 queues. If so, the
1021                  * definition of res.queue_mask needs updating
1022                  */
1023                 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1024                         pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1025                         break;
1026                 }
1027
1028                 res.queue_mask |= (1ull << i);
1029         }
1030         res.gws_mask = res.oac_mask = res.gds_heap_base =
1031                                                 res.gds_heap_size = 0;
1032
1033         pr_debug("Scheduling resources:\n"
1034                         "vmid mask: 0x%8X\n"
1035                         "queue mask: 0x%8llX\n",
1036                         res.vmid_mask, res.queue_mask);
1037
1038         return pm_send_set_resources(&dqm->packets, &res);
1039 }
1040
1041 static int initialize_cpsch(struct device_queue_manager *dqm)
1042 {
1043         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1044
1045         mutex_init(&dqm->lock_hidden);
1046         INIT_LIST_HEAD(&dqm->queues);
1047         dqm->queue_count = dqm->processes_count = 0;
1048         dqm->sdma_queue_count = 0;
1049         dqm->active_runlist = false;
1050         dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
1051
1052         INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1053
1054         return 0;
1055 }
1056
1057 static int start_cpsch(struct device_queue_manager *dqm)
1058 {
1059         int retval;
1060
1061         retval = 0;
1062
1063         retval = pm_init(&dqm->packets, dqm);
1064         if (retval)
1065                 goto fail_packet_manager_init;
1066
1067         retval = set_sched_resources(dqm);
1068         if (retval)
1069                 goto fail_set_sched_resources;
1070
1071         pr_debug("Allocating fence memory\n");
1072
1073         /* allocate fence memory on the gart */
1074         retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1075                                         &dqm->fence_mem);
1076
1077         if (retval)
1078                 goto fail_allocate_vidmem;
1079
1080         dqm->fence_addr = dqm->fence_mem->cpu_ptr;
1081         dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1082
1083         init_interrupts(dqm);
1084
1085         dqm_lock(dqm);
1086         /* clear hang status when driver try to start the hw scheduler */
1087         dqm->is_hws_hang = false;
1088         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1089         dqm_unlock(dqm);
1090
1091         return 0;
1092 fail_allocate_vidmem:
1093 fail_set_sched_resources:
1094         pm_uninit(&dqm->packets);
1095 fail_packet_manager_init:
1096         return retval;
1097 }
1098
1099 static int stop_cpsch(struct device_queue_manager *dqm)
1100 {
1101         dqm_lock(dqm);
1102         unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1103         dqm_unlock(dqm);
1104
1105         kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1106         pm_uninit(&dqm->packets);
1107
1108         return 0;
1109 }
1110
1111 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1112                                         struct kernel_queue *kq,
1113                                         struct qcm_process_device *qpd)
1114 {
1115         dqm_lock(dqm);
1116         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1117                 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1118                                 dqm->total_queue_count);
1119                 dqm_unlock(dqm);
1120                 return -EPERM;
1121         }
1122
1123         /*
1124          * Unconditionally increment this counter, regardless of the queue's
1125          * type or whether the queue is active.
1126          */
1127         dqm->total_queue_count++;
1128         pr_debug("Total of %d queues are accountable so far\n",
1129                         dqm->total_queue_count);
1130
1131         list_add(&kq->list, &qpd->priv_queue_list);
1132         dqm->queue_count++;
1133         qpd->is_debug = true;
1134         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1135         dqm_unlock(dqm);
1136
1137         return 0;
1138 }
1139
1140 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1141                                         struct kernel_queue *kq,
1142                                         struct qcm_process_device *qpd)
1143 {
1144         dqm_lock(dqm);
1145         list_del(&kq->list);
1146         dqm->queue_count--;
1147         qpd->is_debug = false;
1148         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1149         /*
1150          * Unconditionally decrement this counter, regardless of the queue's
1151          * type.
1152          */
1153         dqm->total_queue_count--;
1154         pr_debug("Total of %d queues are accountable so far\n",
1155                         dqm->total_queue_count);
1156         dqm_unlock(dqm);
1157 }
1158
1159 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1160                         struct qcm_process_device *qpd)
1161 {
1162         int retval;
1163         struct mqd_manager *mqd_mgr;
1164
1165         retval = 0;
1166
1167         dqm_lock(dqm);
1168
1169         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1170                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1171                                 dqm->total_queue_count);
1172                 retval = -EPERM;
1173                 goto out_unlock;
1174         }
1175
1176         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1177                 retval = allocate_sdma_queue(dqm, &q->sdma_id);
1178                 if (retval)
1179                         goto out_unlock;
1180                 q->properties.sdma_queue_id =
1181                         q->sdma_id / get_num_sdma_engines(dqm);
1182                 q->properties.sdma_engine_id =
1183                         q->sdma_id % get_num_sdma_engines(dqm);
1184         }
1185
1186         retval = allocate_doorbell(qpd, q);
1187         if (retval)
1188                 goto out_deallocate_sdma_queue;
1189
1190         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1191                         get_mqd_type_from_queue_type(q->properties.type));
1192
1193         if (!mqd_mgr) {
1194                 retval = -ENOMEM;
1195                 goto out_deallocate_doorbell;
1196         }
1197         /*
1198          * Eviction state logic: we only mark active queues as evicted
1199          * to avoid the overhead of restoring inactive queues later
1200          */
1201         if (qpd->evicted)
1202                 q->properties.is_evicted = (q->properties.queue_size > 0 &&
1203                                             q->properties.queue_percent > 0 &&
1204                                             q->properties.queue_address != 0);
1205
1206         dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1207
1208         q->properties.tba_addr = qpd->tba_addr;
1209         q->properties.tma_addr = qpd->tma_addr;
1210         retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
1211                                 &q->gart_mqd_addr, &q->properties);
1212         if (retval)
1213                 goto out_deallocate_doorbell;
1214
1215         list_add(&q->list, &qpd->queues_list);
1216         qpd->queue_count++;
1217         if (q->properties.is_active) {
1218                 dqm->queue_count++;
1219                 retval = execute_queues_cpsch(dqm,
1220                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1221         }
1222
1223         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1224                 dqm->sdma_queue_count++;
1225         /*
1226          * Unconditionally increment this counter, regardless of the queue's
1227          * type or whether the queue is active.
1228          */
1229         dqm->total_queue_count++;
1230
1231         pr_debug("Total of %d queues are accountable so far\n",
1232                         dqm->total_queue_count);
1233
1234         dqm_unlock(dqm);
1235         return retval;
1236
1237 out_deallocate_doorbell:
1238         deallocate_doorbell(qpd, q);
1239 out_deallocate_sdma_queue:
1240         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1241                 deallocate_sdma_queue(dqm, q->sdma_id);
1242 out_unlock:
1243         dqm_unlock(dqm);
1244
1245         return retval;
1246 }
1247
1248 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1249                                 unsigned int fence_value,
1250                                 unsigned int timeout_ms)
1251 {
1252         unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1253
1254         while (*fence_addr != fence_value) {
1255                 if (time_after(jiffies, end_jiffies)) {
1256                         pr_err("qcm fence wait loop timeout expired\n");
1257                         /* In HWS case, this is used to halt the driver thread
1258                          * in order not to mess up CP states before doing
1259                          * scandumps for FW debugging.
1260                          */
1261                         while (halt_if_hws_hang)
1262                                 schedule();
1263
1264                         return -ETIME;
1265                 }
1266                 schedule();
1267         }
1268
1269         return 0;
1270 }
1271
1272 static int unmap_sdma_queues(struct device_queue_manager *dqm,
1273                                 unsigned int sdma_engine)
1274 {
1275         return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1276                         KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
1277                         sdma_engine);
1278 }
1279
1280 /* dqm->lock mutex has to be locked before calling this function */
1281 static int map_queues_cpsch(struct device_queue_manager *dqm)
1282 {
1283         int retval;
1284
1285         if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
1286                 return 0;
1287
1288         if (dqm->active_runlist)
1289                 return 0;
1290
1291         retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1292         if (retval) {
1293                 pr_err("failed to execute runlist\n");
1294                 return retval;
1295         }
1296         dqm->active_runlist = true;
1297
1298         return retval;
1299 }
1300
1301 /* dqm->lock mutex has to be locked before calling this function */
1302 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1303                                 enum kfd_unmap_queues_filter filter,
1304                                 uint32_t filter_param)
1305 {
1306         int retval = 0;
1307
1308         if (dqm->is_hws_hang)
1309                 return -EIO;
1310         if (!dqm->active_runlist)
1311                 return retval;
1312
1313         pr_debug("Before destroying queues, sdma queue count is : %u\n",
1314                 dqm->sdma_queue_count);
1315
1316         if (dqm->sdma_queue_count > 0) {
1317                 unmap_sdma_queues(dqm, 0);
1318                 unmap_sdma_queues(dqm, 1);
1319         }
1320
1321         retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1322                         filter, filter_param, false, 0);
1323         if (retval)
1324                 return retval;
1325
1326         *dqm->fence_addr = KFD_FENCE_INIT;
1327         pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1328                                 KFD_FENCE_COMPLETED);
1329         /* should be timed out */
1330         retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1331                                 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
1332         if (retval)
1333                 return retval;
1334
1335         pm_release_ib(&dqm->packets);
1336         dqm->active_runlist = false;
1337
1338         return retval;
1339 }
1340
1341 /* dqm->lock mutex has to be locked before calling this function */
1342 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1343                                 enum kfd_unmap_queues_filter filter,
1344                                 uint32_t filter_param)
1345 {
1346         int retval;
1347
1348         if (dqm->is_hws_hang)
1349                 return -EIO;
1350         retval = unmap_queues_cpsch(dqm, filter, filter_param);
1351         if (retval) {
1352                 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1353                 dqm->is_hws_hang = true;
1354                 schedule_work(&dqm->hw_exception_work);
1355                 return retval;
1356         }
1357
1358         return map_queues_cpsch(dqm);
1359 }
1360
1361 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1362                                 struct qcm_process_device *qpd,
1363                                 struct queue *q)
1364 {
1365         int retval;
1366         struct mqd_manager *mqd_mgr;
1367
1368         retval = 0;
1369
1370         /* remove queue from list to prevent rescheduling after preemption */
1371         dqm_lock(dqm);
1372
1373         if (qpd->is_debug) {
1374                 /*
1375                  * error, currently we do not allow to destroy a queue
1376                  * of a currently debugged process
1377                  */
1378                 retval = -EBUSY;
1379                 goto failed_try_destroy_debugged_queue;
1380
1381         }
1382
1383         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1384                         get_mqd_type_from_queue_type(q->properties.type));
1385         if (!mqd_mgr) {
1386                 retval = -ENOMEM;
1387                 goto failed;
1388         }
1389
1390         deallocate_doorbell(qpd, q);
1391
1392         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1393                 dqm->sdma_queue_count--;
1394                 deallocate_sdma_queue(dqm, q->sdma_id);
1395         }
1396
1397         list_del(&q->list);
1398         qpd->queue_count--;
1399         if (q->properties.is_active) {
1400                 dqm->queue_count--;
1401                 retval = execute_queues_cpsch(dqm,
1402                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1403                 if (retval == -ETIME)
1404                         qpd->reset_wavefronts = true;
1405         }
1406
1407         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1408
1409         /*
1410          * Unconditionally decrement this counter, regardless of the queue's
1411          * type
1412          */
1413         dqm->total_queue_count--;
1414         pr_debug("Total of %d queues are accountable so far\n",
1415                         dqm->total_queue_count);
1416
1417         dqm_unlock(dqm);
1418
1419         return retval;
1420
1421 failed:
1422 failed_try_destroy_debugged_queue:
1423
1424         dqm_unlock(dqm);
1425         return retval;
1426 }
1427
1428 /*
1429  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1430  * stay in user mode.
1431  */
1432 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1433 /* APE1 limit is inclusive and 64K aligned. */
1434 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1435
1436 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1437                                    struct qcm_process_device *qpd,
1438                                    enum cache_policy default_policy,
1439                                    enum cache_policy alternate_policy,
1440                                    void __user *alternate_aperture_base,
1441                                    uint64_t alternate_aperture_size)
1442 {
1443         bool retval = true;
1444
1445         if (!dqm->asic_ops.set_cache_memory_policy)
1446                 return retval;
1447
1448         dqm_lock(dqm);
1449
1450         if (alternate_aperture_size == 0) {
1451                 /* base > limit disables APE1 */
1452                 qpd->sh_mem_ape1_base = 1;
1453                 qpd->sh_mem_ape1_limit = 0;
1454         } else {
1455                 /*
1456                  * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1457                  *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
1458                  * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1459                  *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1460                  * Verify that the base and size parameters can be
1461                  * represented in this format and convert them.
1462                  * Additionally restrict APE1 to user-mode addresses.
1463                  */
1464
1465                 uint64_t base = (uintptr_t)alternate_aperture_base;
1466                 uint64_t limit = base + alternate_aperture_size - 1;
1467
1468                 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1469                    (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1470                         retval = false;
1471                         goto out;
1472                 }
1473
1474                 qpd->sh_mem_ape1_base = base >> 16;
1475                 qpd->sh_mem_ape1_limit = limit >> 16;
1476         }
1477
1478         retval = dqm->asic_ops.set_cache_memory_policy(
1479                         dqm,
1480                         qpd,
1481                         default_policy,
1482                         alternate_policy,
1483                         alternate_aperture_base,
1484                         alternate_aperture_size);
1485
1486         if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1487                 program_sh_mem_settings(dqm, qpd);
1488
1489         pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1490                 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1491                 qpd->sh_mem_ape1_limit);
1492
1493 out:
1494         dqm_unlock(dqm);
1495         return retval;
1496 }
1497
1498 static int set_trap_handler(struct device_queue_manager *dqm,
1499                                 struct qcm_process_device *qpd,
1500                                 uint64_t tba_addr,
1501                                 uint64_t tma_addr)
1502 {
1503         uint64_t *tma;
1504
1505         if (dqm->dev->cwsr_enabled) {
1506                 /* Jump from CWSR trap handler to user trap */
1507                 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1508                 tma[0] = tba_addr;
1509                 tma[1] = tma_addr;
1510         } else {
1511                 qpd->tba_addr = tba_addr;
1512                 qpd->tma_addr = tma_addr;
1513         }
1514
1515         return 0;
1516 }
1517
1518 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1519                 struct qcm_process_device *qpd)
1520 {
1521         struct queue *q, *next;
1522         struct device_process_node *cur, *next_dpn;
1523         int retval = 0;
1524
1525         dqm_lock(dqm);
1526
1527         /* Clear all user mode queues */
1528         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1529                 int ret;
1530
1531                 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1532                 if (ret)
1533                         retval = ret;
1534         }
1535
1536         /* Unregister process */
1537         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1538                 if (qpd == cur->qpd) {
1539                         list_del(&cur->list);
1540                         kfree(cur);
1541                         dqm->processes_count--;
1542                         break;
1543                 }
1544         }
1545
1546         dqm_unlock(dqm);
1547         return retval;
1548 }
1549
1550 static int get_wave_state(struct device_queue_manager *dqm,
1551                           struct queue *q,
1552                           void __user *ctl_stack,
1553                           u32 *ctl_stack_used_size,
1554                           u32 *save_area_used_size)
1555 {
1556         struct mqd_manager *mqd_mgr;
1557         int r;
1558
1559         dqm_lock(dqm);
1560
1561         if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1562             q->properties.is_active || !q->device->cwsr_enabled) {
1563                 r = -EINVAL;
1564                 goto dqm_unlock;
1565         }
1566
1567         mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
1568         if (!mqd_mgr) {
1569                 r = -ENOMEM;
1570                 goto dqm_unlock;
1571         }
1572
1573         if (!mqd_mgr->get_wave_state) {
1574                 r = -EINVAL;
1575                 goto dqm_unlock;
1576         }
1577
1578         r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1579                         ctl_stack_used_size, save_area_used_size);
1580
1581 dqm_unlock:
1582         dqm_unlock(dqm);
1583         return r;
1584 }
1585
1586 static int process_termination_cpsch(struct device_queue_manager *dqm,
1587                 struct qcm_process_device *qpd)
1588 {
1589         int retval;
1590         struct queue *q, *next;
1591         struct kernel_queue *kq, *kq_next;
1592         struct mqd_manager *mqd_mgr;
1593         struct device_process_node *cur, *next_dpn;
1594         enum kfd_unmap_queues_filter filter =
1595                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1596
1597         retval = 0;
1598
1599         dqm_lock(dqm);
1600
1601         /* Clean all kernel queues */
1602         list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1603                 list_del(&kq->list);
1604                 dqm->queue_count--;
1605                 qpd->is_debug = false;
1606                 dqm->total_queue_count--;
1607                 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1608         }
1609
1610         /* Clear all user mode queues */
1611         list_for_each_entry(q, &qpd->queues_list, list) {
1612                 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1613                         dqm->sdma_queue_count--;
1614                         deallocate_sdma_queue(dqm, q->sdma_id);
1615                 }
1616
1617                 if (q->properties.is_active)
1618                         dqm->queue_count--;
1619
1620                 dqm->total_queue_count--;
1621         }
1622
1623         /* Unregister process */
1624         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1625                 if (qpd == cur->qpd) {
1626                         list_del(&cur->list);
1627                         kfree(cur);
1628                         dqm->processes_count--;
1629                         break;
1630                 }
1631         }
1632
1633         retval = execute_queues_cpsch(dqm, filter, 0);
1634         if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1635                 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1636                 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1637                 qpd->reset_wavefronts = false;
1638         }
1639
1640         /* lastly, free mqd resources */
1641         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1642                 mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1643                         get_mqd_type_from_queue_type(q->properties.type));
1644                 if (!mqd_mgr) {
1645                         retval = -ENOMEM;
1646                         goto out;
1647                 }
1648                 list_del(&q->list);
1649                 qpd->queue_count--;
1650                 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1651         }
1652
1653 out:
1654         dqm_unlock(dqm);
1655         return retval;
1656 }
1657
1658 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1659 {
1660         struct device_queue_manager *dqm;
1661
1662         pr_debug("Loading device queue manager\n");
1663
1664         dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1665         if (!dqm)
1666                 return NULL;
1667
1668         switch (dev->device_info->asic_family) {
1669         /* HWS is not available on Hawaii. */
1670         case CHIP_HAWAII:
1671         /* HWS depends on CWSR for timely dequeue. CWSR is not
1672          * available on Tonga.
1673          *
1674          * FIXME: This argument also applies to Kaveri.
1675          */
1676         case CHIP_TONGA:
1677                 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1678                 break;
1679         default:
1680                 dqm->sched_policy = sched_policy;
1681                 break;
1682         }
1683
1684         dqm->dev = dev;
1685         switch (dqm->sched_policy) {
1686         case KFD_SCHED_POLICY_HWS:
1687         case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1688                 /* initialize dqm for cp scheduling */
1689                 dqm->ops.create_queue = create_queue_cpsch;
1690                 dqm->ops.initialize = initialize_cpsch;
1691                 dqm->ops.start = start_cpsch;
1692                 dqm->ops.stop = stop_cpsch;
1693                 dqm->ops.destroy_queue = destroy_queue_cpsch;
1694                 dqm->ops.update_queue = update_queue;
1695                 dqm->ops.get_mqd_manager = get_mqd_manager;
1696                 dqm->ops.register_process = register_process;
1697                 dqm->ops.unregister_process = unregister_process;
1698                 dqm->ops.uninitialize = uninitialize;
1699                 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1700                 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1701                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1702                 dqm->ops.set_trap_handler = set_trap_handler;
1703                 dqm->ops.process_termination = process_termination_cpsch;
1704                 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1705                 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1706                 dqm->ops.get_wave_state = get_wave_state;
1707                 break;
1708         case KFD_SCHED_POLICY_NO_HWS:
1709                 /* initialize dqm for no cp scheduling */
1710                 dqm->ops.start = start_nocpsch;
1711                 dqm->ops.stop = stop_nocpsch;
1712                 dqm->ops.create_queue = create_queue_nocpsch;
1713                 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1714                 dqm->ops.update_queue = update_queue;
1715                 dqm->ops.get_mqd_manager = get_mqd_manager;
1716                 dqm->ops.register_process = register_process;
1717                 dqm->ops.unregister_process = unregister_process;
1718                 dqm->ops.initialize = initialize_nocpsch;
1719                 dqm->ops.uninitialize = uninitialize;
1720                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1721                 dqm->ops.set_trap_handler = set_trap_handler;
1722                 dqm->ops.process_termination = process_termination_nocpsch;
1723                 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1724                 dqm->ops.restore_process_queues =
1725                         restore_process_queues_nocpsch;
1726                 dqm->ops.get_wave_state = get_wave_state;
1727                 break;
1728         default:
1729                 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1730                 goto out_free;
1731         }
1732
1733         switch (dev->device_info->asic_family) {
1734         case CHIP_CARRIZO:
1735                 device_queue_manager_init_vi(&dqm->asic_ops);
1736                 break;
1737
1738         case CHIP_KAVERI:
1739                 device_queue_manager_init_cik(&dqm->asic_ops);
1740                 break;
1741
1742         case CHIP_HAWAII:
1743                 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1744                 break;
1745
1746         case CHIP_TONGA:
1747         case CHIP_FIJI:
1748         case CHIP_POLARIS10:
1749         case CHIP_POLARIS11:
1750         case CHIP_POLARIS12:
1751                 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1752                 break;
1753
1754         case CHIP_VEGA10:
1755         case CHIP_VEGA12:
1756         case CHIP_VEGA20:
1757         case CHIP_RAVEN:
1758                 device_queue_manager_init_v9(&dqm->asic_ops);
1759                 break;
1760         default:
1761                 WARN(1, "Unexpected ASIC family %u",
1762                      dev->device_info->asic_family);
1763                 goto out_free;
1764         }
1765
1766         if (!dqm->ops.initialize(dqm))
1767                 return dqm;
1768
1769 out_free:
1770         kfree(dqm);
1771         return NULL;
1772 }
1773
1774 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1775 {
1776         dqm->ops.uninitialize(dqm);
1777         kfree(dqm);
1778 }
1779
1780 int kfd_process_vm_fault(struct device_queue_manager *dqm,
1781                          unsigned int pasid)
1782 {
1783         struct kfd_process_device *pdd;
1784         struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1785         int ret = 0;
1786
1787         if (!p)
1788                 return -EINVAL;
1789         pdd = kfd_get_process_device_data(dqm->dev, p);
1790         if (pdd)
1791                 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
1792         kfd_unref_process(p);
1793
1794         return ret;
1795 }
1796
1797 static void kfd_process_hw_exception(struct work_struct *work)
1798 {
1799         struct device_queue_manager *dqm = container_of(work,
1800                         struct device_queue_manager, hw_exception_work);
1801         amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
1802 }
1803
1804 #if defined(CONFIG_DEBUG_FS)
1805
1806 static void seq_reg_dump(struct seq_file *m,
1807                          uint32_t (*dump)[2], uint32_t n_regs)
1808 {
1809         uint32_t i, count;
1810
1811         for (i = 0, count = 0; i < n_regs; i++) {
1812                 if (count == 0 ||
1813                     dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
1814                         seq_printf(m, "%s    %08x: %08x",
1815                                    i ? "\n" : "",
1816                                    dump[i][0], dump[i][1]);
1817                         count = 7;
1818                 } else {
1819                         seq_printf(m, " %08x", dump[i][1]);
1820                         count--;
1821                 }
1822         }
1823
1824         seq_puts(m, "\n");
1825 }
1826
1827 int dqm_debugfs_hqds(struct seq_file *m, void *data)
1828 {
1829         struct device_queue_manager *dqm = data;
1830         uint32_t (*dump)[2], n_regs;
1831         int pipe, queue;
1832         int r = 0;
1833
1834         r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
1835                 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
1836         if (!r) {
1837                 seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
1838                                 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
1839                                 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
1840                                 KFD_CIK_HIQ_QUEUE);
1841                 seq_reg_dump(m, dump, n_regs);
1842
1843                 kfree(dump);
1844         }
1845
1846         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1847                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
1848
1849                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
1850                         if (!test_bit(pipe_offset + queue,
1851                                       dqm->dev->shared_resources.queue_bitmap))
1852                                 continue;
1853
1854                         r = dqm->dev->kfd2kgd->hqd_dump(
1855                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1856                         if (r)
1857                                 break;
1858
1859                         seq_printf(m, "  CP Pipe %d, Queue %d\n",
1860                                   pipe, queue);
1861                         seq_reg_dump(m, dump, n_regs);
1862
1863                         kfree(dump);
1864                 }
1865         }
1866
1867         for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
1868                 for (queue = 0;
1869                      queue < dqm->dev->device_info->num_sdma_queues_per_engine;
1870                      queue++) {
1871                         r = dqm->dev->kfd2kgd->hqd_sdma_dump(
1872                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1873                         if (r)
1874                                 break;
1875
1876                         seq_printf(m, "  SDMA Engine %d, RLC %d\n",
1877                                   pipe, queue);
1878                         seq_reg_dump(m, dump, n_regs);
1879
1880                         kfree(dump);
1881                 }
1882         }
1883
1884         return r;
1885 }
1886
1887 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
1888 {
1889         int r = 0;
1890
1891         dqm_lock(dqm);
1892         dqm->active_runlist = true;
1893         r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1894         dqm_unlock(dqm);
1895
1896         return r;
1897 }
1898
1899 #endif