Merge tag 'dma-mapping-5.15' of git://git.infradead.org/users/hch/dma-mapping
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / mes_v10_1.c
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "amdgpu.h"
27 #include "soc15_common.h"
28 #include "nv.h"
29 #include "gc/gc_10_1_0_offset.h"
30 #include "gc/gc_10_1_0_sh_mask.h"
31 #include "v10_structs.h"
32 #include "mes_api_def.h"
33
34 #define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid               0x2820
35 #define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX      1
36
37 MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
38 MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
39
40 static int mes_v10_1_hw_fini(void *handle);
41
42 #define MES_EOP_SIZE   2048
43
44 static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
45 {
46         struct amdgpu_device *adev = ring->adev;
47
48         if (ring->use_doorbell) {
49                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs],
50                              ring->wptr);
51                 WDOORBELL64(ring->doorbell_index, ring->wptr);
52         } else {
53                 BUG();
54         }
55 }
56
57 static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
58 {
59         return ring->adev->wb.wb[ring->rptr_offs];
60 }
61
62 static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
63 {
64         u64 wptr;
65
66         if (ring->use_doorbell)
67                 wptr = atomic64_read((atomic64_t *)
68                                      &ring->adev->wb.wb[ring->wptr_offs]);
69         else
70                 BUG();
71         return wptr;
72 }
73
74 static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
75         .type = AMDGPU_RING_TYPE_MES,
76         .align_mask = 1,
77         .nop = 0,
78         .support_64bit_ptrs = true,
79         .get_rptr = mes_v10_1_ring_get_rptr,
80         .get_wptr = mes_v10_1_ring_get_wptr,
81         .set_wptr = mes_v10_1_ring_set_wptr,
82         .insert_nop = amdgpu_ring_insert_nop,
83 };
84
85 static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
86                                                     void *pkt, int size)
87 {
88         int ndw = size / 4;
89         signed long r;
90         union MESAPI__ADD_QUEUE *x_pkt = pkt;
91         struct amdgpu_device *adev = mes->adev;
92         struct amdgpu_ring *ring = &mes->ring;
93
94         BUG_ON(size % 4 != 0);
95
96         if (amdgpu_ring_alloc(ring, ndw))
97                 return -ENOMEM;
98
99         amdgpu_ring_write_multiple(ring, pkt, ndw);
100         amdgpu_ring_commit(ring);
101
102         DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
103
104         r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
105                                       adev->usec_timeout);
106         if (r < 1) {
107                 DRM_ERROR("MES failed to response msg=%d\n",
108                           x_pkt->header.opcode);
109                 return -ETIMEDOUT;
110         }
111
112         return 0;
113 }
114
115 static int convert_to_mes_queue_type(int queue_type)
116 {
117         if (queue_type == AMDGPU_RING_TYPE_GFX)
118                 return MES_QUEUE_TYPE_GFX;
119         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
120                 return MES_QUEUE_TYPE_COMPUTE;
121         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
122                 return MES_QUEUE_TYPE_SDMA;
123         else
124                 BUG();
125         return -1;
126 }
127
128 static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
129                                   struct mes_add_queue_input *input)
130 {
131         struct amdgpu_device *adev = mes->adev;
132         union MESAPI__ADD_QUEUE mes_add_queue_pkt;
133
134         memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
135
136         mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
137         mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
138         mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
139
140         mes_add_queue_pkt.process_id = input->process_id;
141         mes_add_queue_pkt.page_table_base_addr =
142                 input->page_table_base_addr - adev->gmc.vram_start;
143         mes_add_queue_pkt.process_va_start = input->process_va_start;
144         mes_add_queue_pkt.process_va_end = input->process_va_end;
145         mes_add_queue_pkt.process_quantum = input->process_quantum;
146         mes_add_queue_pkt.process_context_addr = input->process_context_addr;
147         mes_add_queue_pkt.gang_quantum = input->gang_quantum;
148         mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
149         mes_add_queue_pkt.inprocess_gang_priority =
150                 input->inprocess_gang_priority;
151         mes_add_queue_pkt.gang_global_priority_level =
152                 input->gang_global_priority_level;
153         mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
154         mes_add_queue_pkt.mqd_addr = input->mqd_addr;
155         mes_add_queue_pkt.wptr_addr = input->wptr_addr;
156         mes_add_queue_pkt.queue_type =
157                 convert_to_mes_queue_type(input->queue_type);
158         mes_add_queue_pkt.paging = input->paging;
159
160         mes_add_queue_pkt.api_status.api_completion_fence_addr =
161                 mes->ring.fence_drv.gpu_addr;
162         mes_add_queue_pkt.api_status.api_completion_fence_value =
163                 ++mes->ring.fence_drv.sync_seq;
164
165         return mes_v10_1_submit_pkt_and_poll_completion(mes,
166                         &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
167 }
168
169 static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
170                                      struct mes_remove_queue_input *input)
171 {
172         union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
173
174         memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
175
176         mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
177         mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
178         mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
179
180         mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
181         mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
182
183         mes_remove_queue_pkt.api_status.api_completion_fence_addr =
184                 mes->ring.fence_drv.gpu_addr;
185         mes_remove_queue_pkt.api_status.api_completion_fence_value =
186                 ++mes->ring.fence_drv.sync_seq;
187
188         return mes_v10_1_submit_pkt_and_poll_completion(mes,
189                         &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
190 }
191
192 static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
193                                   struct mes_suspend_gang_input *input)
194 {
195         return 0;
196 }
197
198 static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
199                                  struct mes_resume_gang_input *input)
200 {
201         return 0;
202 }
203
204 static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
205 {
206         union MESAPI__QUERY_MES_STATUS mes_status_pkt;
207
208         memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
209
210         mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
211         mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
212         mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
213
214         mes_status_pkt.api_status.api_completion_fence_addr =
215                 mes->ring.fence_drv.gpu_addr;
216         mes_status_pkt.api_status.api_completion_fence_value =
217                 ++mes->ring.fence_drv.sync_seq;
218
219         return mes_v10_1_submit_pkt_and_poll_completion(mes,
220                         &mes_status_pkt, sizeof(mes_status_pkt));
221 }
222
223 static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
224 {
225         int i;
226         struct amdgpu_device *adev = mes->adev;
227         union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
228
229         memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
230
231         mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
232         mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
233         mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
234
235         mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
236         mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
237         mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
238         mes_set_hw_res_pkt.paging_vmid = 0;
239         mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
240         mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
241                 mes->query_status_fence_gpu_addr;
242
243         for (i = 0; i < MAX_COMPUTE_PIPES; i++)
244                 mes_set_hw_res_pkt.compute_hqd_mask[i] =
245                         mes->compute_hqd_mask[i];
246
247         for (i = 0; i < MAX_GFX_PIPES; i++)
248                 mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
249
250         for (i = 0; i < MAX_SDMA_PIPES; i++)
251                 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
252
253         for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
254                 mes_set_hw_res_pkt.agreegated_doorbells[i] =
255                         mes->agreegated_doorbells[i];
256
257         mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
258                 mes->ring.fence_drv.gpu_addr;
259         mes_set_hw_res_pkt.api_status.api_completion_fence_value =
260                 ++mes->ring.fence_drv.sync_seq;
261
262         return mes_v10_1_submit_pkt_and_poll_completion(mes,
263                         &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
264 }
265
266 static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
267         .add_hw_queue = mes_v10_1_add_hw_queue,
268         .remove_hw_queue = mes_v10_1_remove_hw_queue,
269         .suspend_gang = mes_v10_1_suspend_gang,
270         .resume_gang = mes_v10_1_resume_gang,
271 };
272
273 static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
274 {
275         const char *chip_name;
276         char fw_name[30];
277         int err;
278         const struct mes_firmware_header_v1_0 *mes_hdr;
279         struct amdgpu_firmware_info *info;
280
281         switch (adev->asic_type) {
282         case CHIP_NAVI10:
283                 chip_name = "navi10";
284                 break;
285         case CHIP_SIENNA_CICHLID:
286                 chip_name = "sienna_cichlid";
287                 break;
288         default:
289                 BUG();
290         }
291
292         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name);
293         err = request_firmware(&adev->mes.fw, fw_name, adev->dev);
294         if (err)
295                 return err;
296
297         err = amdgpu_ucode_validate(adev->mes.fw);
298         if (err) {
299                 release_firmware(adev->mes.fw);
300                 adev->mes.fw = NULL;
301                 return err;
302         }
303
304         mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data;
305         adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version);
306         adev->mes.ucode_fw_version =
307                 le32_to_cpu(mes_hdr->mes_ucode_data_version);
308         adev->mes.uc_start_addr =
309                 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
310                 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
311         adev->mes.data_start_addr =
312                 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
313                 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
314
315         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
316                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
317                 info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
318                 info->fw = adev->mes.fw;
319                 adev->firmware.fw_size +=
320                         ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
321                               PAGE_SIZE);
322
323                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
324                 info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
325                 info->fw = adev->mes.fw;
326                 adev->firmware.fw_size +=
327                         ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
328                               PAGE_SIZE);
329         }
330
331         return 0;
332 }
333
334 static void mes_v10_1_free_microcode(struct amdgpu_device *adev)
335 {
336         release_firmware(adev->mes.fw);
337         adev->mes.fw = NULL;
338 }
339
340 static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev)
341 {
342         int r;
343         const struct mes_firmware_header_v1_0 *mes_hdr;
344         const __le32 *fw_data;
345         unsigned fw_size;
346
347         mes_hdr = (const struct mes_firmware_header_v1_0 *)
348                 adev->mes.fw->data;
349
350         fw_data = (const __le32 *)(adev->mes.fw->data +
351                    le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
352         fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
353
354         r = amdgpu_bo_create_reserved(adev, fw_size,
355                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
356                                       &adev->mes.ucode_fw_obj,
357                                       &adev->mes.ucode_fw_gpu_addr,
358                                       (void **)&adev->mes.ucode_fw_ptr);
359         if (r) {
360                 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
361                 return r;
362         }
363
364         memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size);
365
366         amdgpu_bo_kunmap(adev->mes.ucode_fw_obj);
367         amdgpu_bo_unreserve(adev->mes.ucode_fw_obj);
368
369         return 0;
370 }
371
372 static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev)
373 {
374         int r;
375         const struct mes_firmware_header_v1_0 *mes_hdr;
376         const __le32 *fw_data;
377         unsigned fw_size;
378
379         mes_hdr = (const struct mes_firmware_header_v1_0 *)
380                 adev->mes.fw->data;
381
382         fw_data = (const __le32 *)(adev->mes.fw->data +
383                    le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
384         fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
385
386         r = amdgpu_bo_create_reserved(adev, fw_size,
387                                       64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
388                                       &adev->mes.data_fw_obj,
389                                       &adev->mes.data_fw_gpu_addr,
390                                       (void **)&adev->mes.data_fw_ptr);
391         if (r) {
392                 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
393                 return r;
394         }
395
396         memcpy(adev->mes.data_fw_ptr, fw_data, fw_size);
397
398         amdgpu_bo_kunmap(adev->mes.data_fw_obj);
399         amdgpu_bo_unreserve(adev->mes.data_fw_obj);
400
401         return 0;
402 }
403
404 static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev)
405 {
406         amdgpu_bo_free_kernel(&adev->mes.data_fw_obj,
407                               &adev->mes.data_fw_gpu_addr,
408                               (void **)&adev->mes.data_fw_ptr);
409
410         amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj,
411                               &adev->mes.ucode_fw_gpu_addr,
412                               (void **)&adev->mes.ucode_fw_ptr);
413 }
414
415 static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
416 {
417         uint32_t data = 0;
418
419         if (enable) {
420                 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
421                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
422                 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
423
424                 /* set ucode start address */
425                 WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
426                              (uint32_t)(adev->mes.uc_start_addr) >> 2);
427
428                 /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
429                 data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
430                 data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
431                                      BYPASS_UNCACHED, 0);
432                 WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
433
434                 /* unhalt MES and activate pipe0 */
435                 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
436                 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
437         } else {
438                 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
439                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
440                 data = REG_SET_FIELD(data, CP_MES_CNTL,
441                                      MES_INVALIDATE_ICACHE, 1);
442                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
443                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
444                 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
445         }
446 }
447
448 /* This function is for backdoor MES firmware */
449 static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
450 {
451         int r;
452         uint32_t data;
453
454         if (!adev->mes.fw)
455                 return -EINVAL;
456
457         r = mes_v10_1_allocate_ucode_buffer(adev);
458         if (r)
459                 return r;
460
461         r = mes_v10_1_allocate_ucode_data_buffer(adev);
462         if (r) {
463                 mes_v10_1_free_ucode_buffers(adev);
464                 return r;
465         }
466
467         mes_v10_1_enable(adev, false);
468
469         WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
470
471         mutex_lock(&adev->srbm_mutex);
472         /* me=3, pipe=0, queue=0 */
473         nv_grbm_select(adev, 3, 0, 0, 0);
474
475         /* set ucode start address */
476         WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
477                      (uint32_t)(adev->mes.uc_start_addr) >> 2);
478
479         /* set ucode fimrware address */
480         WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
481                      lower_32_bits(adev->mes.ucode_fw_gpu_addr));
482         WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
483                      upper_32_bits(adev->mes.ucode_fw_gpu_addr));
484
485         /* set ucode instruction cache boundary to 2M-1 */
486         WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
487
488         /* set ucode data firmware address */
489         WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
490                      lower_32_bits(adev->mes.data_fw_gpu_addr));
491         WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
492                      upper_32_bits(adev->mes.data_fw_gpu_addr));
493
494         /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
495         WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
496
497         /* invalidate ICACHE */
498         switch (adev->asic_type) {
499         case CHIP_SIENNA_CICHLID:
500                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
501                 break;
502         default:
503                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
504                 break;
505         }
506         data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
507         data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
508         switch (adev->asic_type) {
509         case CHIP_SIENNA_CICHLID:
510                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
511                 break;
512         default:
513                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
514                 break;
515         }
516
517         /* prime the ICACHE. */
518         switch (adev->asic_type) {
519         case CHIP_SIENNA_CICHLID:
520                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
521                 break;
522         default:
523                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
524                 break;
525         }
526         data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
527         switch (adev->asic_type) {
528         case CHIP_SIENNA_CICHLID:
529                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
530                 break;
531         default:
532                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
533                 break;
534         }
535
536         nv_grbm_select(adev, 0, 0, 0, 0);
537         mutex_unlock(&adev->srbm_mutex);
538
539         return 0;
540 }
541
542 static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
543 {
544         int r;
545         u32 *eop;
546
547         r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
548                                       AMDGPU_GEM_DOMAIN_GTT,
549                                       &adev->mes.eop_gpu_obj,
550                                       &adev->mes.eop_gpu_addr,
551                                       (void **)&eop);
552         if (r) {
553                 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
554                 return r;
555         }
556
557         memset(eop, 0, adev->mes.eop_gpu_obj->tbo.base.size);
558
559         amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
560         amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
561
562         return 0;
563 }
564
565 static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
566 {
567         int r;
568
569         r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
570         if (r) {
571                 dev_err(adev->dev,
572                         "(%d) mes sch_ctx_offs wb alloc failed\n", r);
573                 return r;
574         }
575         adev->mes.sch_ctx_gpu_addr =
576                 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
577         adev->mes.sch_ctx_ptr =
578                 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
579
580         r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
581         if (r) {
582                 dev_err(adev->dev,
583                         "(%d) query_status_fence_offs wb alloc failed\n", r);
584                 return r;
585         }
586         adev->mes.query_status_fence_gpu_addr =
587                 adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
588         adev->mes.query_status_fence_ptr =
589                 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
590
591         return 0;
592 }
593
594 static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
595 {
596         struct amdgpu_device *adev = ring->adev;
597         struct v10_compute_mqd *mqd = ring->mqd_ptr;
598         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
599         uint32_t tmp;
600
601         mqd->header = 0xC0310800;
602         mqd->compute_pipelinestat_enable = 0x00000001;
603         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
604         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
605         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
606         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
607         mqd->compute_misc_reserved = 0x00000003;
608
609         eop_base_addr = ring->eop_gpu_addr >> 8;
610         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
611         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
612
613         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
614         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
615         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
616                         (order_base_2(MES_EOP_SIZE / 4) - 1));
617
618         mqd->cp_hqd_eop_control = tmp;
619
620         /* enable doorbell? */
621         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
622
623         if (ring->use_doorbell) {
624                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
625                                     DOORBELL_OFFSET, ring->doorbell_index);
626                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
627                                     DOORBELL_EN, 1);
628                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
629                                     DOORBELL_SOURCE, 0);
630                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
631                                     DOORBELL_HIT, 0);
632         }
633         else
634                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
635                                     DOORBELL_EN, 0);
636
637         mqd->cp_hqd_pq_doorbell_control = tmp;
638
639         /* disable the queue if it's active */
640         ring->wptr = 0;
641         mqd->cp_hqd_dequeue_request = 0;
642         mqd->cp_hqd_pq_rptr = 0;
643         mqd->cp_hqd_pq_wptr_lo = 0;
644         mqd->cp_hqd_pq_wptr_hi = 0;
645
646         /* set the pointer to the MQD */
647         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
648         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
649
650         /* set MQD vmid to 0 */
651         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
652         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
653         mqd->cp_mqd_control = tmp;
654
655         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
656         hqd_gpu_addr = ring->gpu_addr >> 8;
657         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
658         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
659
660         /* set up the HQD, this is similar to CP_RB0_CNTL */
661         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
662         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
663                             (order_base_2(ring->ring_size / 4) - 1));
664         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
665                             ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
666 #ifdef __BIG_ENDIAN
667         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
668 #endif
669         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
670         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
671         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
672         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
673         mqd->cp_hqd_pq_control = tmp;
674
675         /* set the wb address whether it's enabled or not */
676         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
677         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
678         mqd->cp_hqd_pq_rptr_report_addr_hi =
679                 upper_32_bits(wb_gpu_addr) & 0xffff;
680
681         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
682         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
683         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
684         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
685
686         tmp = 0;
687         /* enable the doorbell if requested */
688         if (ring->use_doorbell) {
689                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
690                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
691                                 DOORBELL_OFFSET, ring->doorbell_index);
692
693                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
694                                     DOORBELL_EN, 1);
695                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
696                                     DOORBELL_SOURCE, 0);
697                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
698                                     DOORBELL_HIT, 0);
699         }
700
701         mqd->cp_hqd_pq_doorbell_control = tmp;
702
703         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
704         ring->wptr = 0;
705         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
706
707         /* set the vmid for the queue */
708         mqd->cp_hqd_vmid = 0;
709
710         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
711         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
712         mqd->cp_hqd_persistent_state = tmp;
713
714         /* set MIN_IB_AVAIL_SIZE */
715         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
716         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
717         mqd->cp_hqd_ib_control = tmp;
718
719         /* activate the queue */
720         mqd->cp_hqd_active = 1;
721         return 0;
722 }
723
724 static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
725 {
726         struct v10_compute_mqd *mqd = ring->mqd_ptr;
727         struct amdgpu_device *adev = ring->adev;
728         uint32_t data = 0;
729
730         mutex_lock(&adev->srbm_mutex);
731         nv_grbm_select(adev, 3, 0, 0, 0);
732
733         /* set CP_HQD_VMID.VMID = 0. */
734         data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
735         data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
736         WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
737
738         /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
739         data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
740         data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
741                              DOORBELL_EN, 0);
742         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
743
744         /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
745         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
746         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
747
748         /* set CP_MQD_CONTROL.VMID=0 */
749         data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
750         data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
751         WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
752
753         /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
754         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
755         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
756
757         /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
758         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
759                      mqd->cp_hqd_pq_rptr_report_addr_lo);
760         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
761                      mqd->cp_hqd_pq_rptr_report_addr_hi);
762
763         /* set CP_HQD_PQ_CONTROL */
764         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
765
766         /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
767         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
768                      mqd->cp_hqd_pq_wptr_poll_addr_lo);
769         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
770                      mqd->cp_hqd_pq_wptr_poll_addr_hi);
771
772         /* set CP_HQD_PQ_DOORBELL_CONTROL */
773         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
774                      mqd->cp_hqd_pq_doorbell_control);
775
776         /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
777         WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
778
779         /* set CP_HQD_ACTIVE.ACTIVE=1 */
780         WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
781
782         nv_grbm_select(adev, 0, 0, 0, 0);
783         mutex_unlock(&adev->srbm_mutex);
784 }
785
786 #if 0
787 static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
788 {
789         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
790         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
791         int r;
792
793         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
794                 return -EINVAL;
795
796         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
797         if (r) {
798                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
799                 return r;
800         }
801
802         kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
803
804         r = amdgpu_ring_test_ring(kiq_ring);
805         if (r) {
806                 DRM_ERROR("kfq enable failed\n");
807                 kiq_ring->sched.ready = false;
808         }
809         return r;
810 }
811 #endif
812
813 static int mes_v10_1_queue_init(struct amdgpu_device *adev)
814 {
815         int r;
816
817         r = mes_v10_1_mqd_init(&adev->mes.ring);
818         if (r)
819                 return r;
820
821 #if 0
822         r = mes_v10_1_kiq_enable_queue(adev);
823         if (r)
824                 return r;
825 #else
826         mes_v10_1_queue_init_register(&adev->mes.ring);
827 #endif
828
829         return 0;
830 }
831
832 static int mes_v10_1_ring_init(struct amdgpu_device *adev)
833 {
834         struct amdgpu_ring *ring;
835
836         ring = &adev->mes.ring;
837
838         ring->funcs = &mes_v10_1_ring_funcs;
839
840         ring->me = 3;
841         ring->pipe = 0;
842         ring->queue = 0;
843
844         ring->ring_obj = NULL;
845         ring->use_doorbell = true;
846         ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
847         ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
848         ring->no_scheduler = true;
849         sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
850
851         return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
852                                 AMDGPU_RING_PRIO_DEFAULT, NULL);
853 }
854
855 static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
856 {
857         int r, mqd_size = sizeof(struct v10_compute_mqd);
858         struct amdgpu_ring *ring = &adev->mes.ring;
859
860         if (ring->mqd_obj)
861                 return 0;
862
863         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
864                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
865                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
866         if (r) {
867                 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
868                 return r;
869         }
870
871         /* prepare MQD backup */
872         adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
873         if (!adev->mes.mqd_backup)
874                 dev_warn(adev->dev,
875                          "no memory to create MQD backup for ring %s\n",
876                          ring->name);
877
878         return 0;
879 }
880
881 static int mes_v10_1_sw_init(void *handle)
882 {
883         int r;
884         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
885
886         adev->mes.adev = adev;
887         adev->mes.funcs = &mes_v10_1_funcs;
888
889         r = mes_v10_1_init_microcode(adev);
890         if (r)
891                 return r;
892
893         r = mes_v10_1_allocate_eop_buf(adev);
894         if (r)
895                 return r;
896
897         r = mes_v10_1_mqd_sw_init(adev);
898         if (r)
899                 return r;
900
901         r = mes_v10_1_ring_init(adev);
902         if (r)
903                 return r;
904
905         r = mes_v10_1_allocate_mem_slots(adev);
906         if (r)
907                 return r;
908
909         return 0;
910 }
911
912 static int mes_v10_1_sw_fini(void *handle)
913 {
914         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
915
916         amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
917         amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
918
919         kfree(adev->mes.mqd_backup);
920
921         amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
922                               &adev->mes.ring.mqd_gpu_addr,
923                               &adev->mes.ring.mqd_ptr);
924
925         amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
926                               &adev->mes.eop_gpu_addr,
927                               NULL);
928
929         mes_v10_1_free_microcode(adev);
930
931         return 0;
932 }
933
934 static int mes_v10_1_hw_init(void *handle)
935 {
936         int r;
937         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
938
939         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
940                 r = mes_v10_1_load_microcode(adev);
941                 if (r) {
942                         DRM_ERROR("failed to MES fw, r=%d\n", r);
943                         return r;
944                 }
945         }
946
947         mes_v10_1_enable(adev, true);
948
949         r = mes_v10_1_queue_init(adev);
950         if (r)
951                 goto failure;
952
953         r = mes_v10_1_set_hw_resources(&adev->mes);
954         if (r)
955                 goto failure;
956
957         r = mes_v10_1_query_sched_status(&adev->mes);
958         if (r) {
959                 DRM_ERROR("MES is busy\n");
960                 goto failure;
961         }
962
963         return 0;
964
965 failure:
966         mes_v10_1_hw_fini(adev);
967         return r;
968 }
969
970 static int mes_v10_1_hw_fini(void *handle)
971 {
972         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
973
974         mes_v10_1_enable(adev, false);
975
976         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
977                 mes_v10_1_free_ucode_buffers(adev);
978
979         return 0;
980 }
981
982 static int mes_v10_1_suspend(void *handle)
983 {
984         return 0;
985 }
986
987 static int mes_v10_1_resume(void *handle)
988 {
989         return 0;
990 }
991
992 static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
993         .name = "mes_v10_1",
994         .sw_init = mes_v10_1_sw_init,
995         .sw_fini = mes_v10_1_sw_fini,
996         .hw_init = mes_v10_1_hw_init,
997         .hw_fini = mes_v10_1_hw_fini,
998         .suspend = mes_v10_1_suspend,
999         .resume = mes_v10_1_resume,
1000 };
1001
1002 const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
1003         .type = AMD_IP_BLOCK_TYPE_MES,
1004         .major = 10,
1005         .minor = 1,
1006         .rev = 0,
1007         .funcs = &mes_v10_1_ip_funcs,
1008 };