drm/amdgpu/gfx9: fix ngg enablement to clear gds reserved memory (v2)
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "soc15.h"
29 #include "soc15d.h"
30
31 #include "gc/gc_9_0_offset.h"
32 #include "gc/gc_9_0_sh_mask.h"
33 #include "vega10_enum.h"
34 #include "hdp/hdp_4_0_offset.h"
35
36 #include "soc15_common.h"
37 #include "clearstate_gfx9.h"
38 #include "v9_structs.h"
39
40 #define GFX9_NUM_GFX_RINGS     1
41 #define GFX9_MEC_HPD_SIZE 2048
42 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
43 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
44 #define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
45
46 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
47 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
48 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
49 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
50 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
51 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
52
53 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
54 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
55 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
56 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
57 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
58 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
59
60 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
61 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
62 MODULE_FIRMWARE("amdgpu/raven_me.bin");
63 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
64 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
65 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
66
67 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
68 {
69         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
70         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
71         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
72         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
73         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
74         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
75         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
76         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
77         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
78         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
79         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
80         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
81         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
82         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
83         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
84         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
85         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
86         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
87         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
88         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
89         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
90         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
91         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
92 };
93
94 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
95 {
96         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
97         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
98         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
99         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
100         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
101         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
102         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
103 };
104
105 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
106 {
107         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
108         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
128 };
129
130 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
131 {
132         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
139 };
140
141 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
142 {
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
145 };
146
147 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
148 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
149
150 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
151 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
152 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
153 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
154 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
155                                  struct amdgpu_cu_info *cu_info);
156 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
157 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
158 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
159
160 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
161 {
162         switch (adev->asic_type) {
163         case CHIP_VEGA10:
164                 soc15_program_register_sequence(adev,
165                                                  golden_settings_gc_9_0,
166                                                  ARRAY_SIZE(golden_settings_gc_9_0));
167                 soc15_program_register_sequence(adev,
168                                                  golden_settings_gc_9_0_vg10,
169                                                  ARRAY_SIZE(golden_settings_gc_9_0_vg10));
170                 break;
171         case CHIP_RAVEN:
172                 soc15_program_register_sequence(adev,
173                                                  golden_settings_gc_9_1,
174                                                  ARRAY_SIZE(golden_settings_gc_9_1));
175                 soc15_program_register_sequence(adev,
176                                                  golden_settings_gc_9_1_rv1,
177                                                  ARRAY_SIZE(golden_settings_gc_9_1_rv1));
178                 break;
179         default:
180                 break;
181         }
182
183         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
184                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
185 }
186
187 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
188 {
189         adev->gfx.scratch.num_reg = 8;
190         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
191         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
192 }
193
194 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
195                                        bool wc, uint32_t reg, uint32_t val)
196 {
197         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
198         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
199                                 WRITE_DATA_DST_SEL(0) |
200                                 (wc ? WR_CONFIRM : 0));
201         amdgpu_ring_write(ring, reg);
202         amdgpu_ring_write(ring, 0);
203         amdgpu_ring_write(ring, val);
204 }
205
206 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
207                                   int mem_space, int opt, uint32_t addr0,
208                                   uint32_t addr1, uint32_t ref, uint32_t mask,
209                                   uint32_t inv)
210 {
211         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
212         amdgpu_ring_write(ring,
213                                  /* memory (1) or register (0) */
214                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
215                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
216                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
217                                  WAIT_REG_MEM_ENGINE(eng_sel)));
218
219         if (mem_space)
220                 BUG_ON(addr0 & 0x3); /* Dword align */
221         amdgpu_ring_write(ring, addr0);
222         amdgpu_ring_write(ring, addr1);
223         amdgpu_ring_write(ring, ref);
224         amdgpu_ring_write(ring, mask);
225         amdgpu_ring_write(ring, inv); /* poll interval */
226 }
227
228 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
229 {
230         struct amdgpu_device *adev = ring->adev;
231         uint32_t scratch;
232         uint32_t tmp = 0;
233         unsigned i;
234         int r;
235
236         r = amdgpu_gfx_scratch_get(adev, &scratch);
237         if (r) {
238                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
239                 return r;
240         }
241         WREG32(scratch, 0xCAFEDEAD);
242         r = amdgpu_ring_alloc(ring, 3);
243         if (r) {
244                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
245                           ring->idx, r);
246                 amdgpu_gfx_scratch_free(adev, scratch);
247                 return r;
248         }
249         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
250         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
251         amdgpu_ring_write(ring, 0xDEADBEEF);
252         amdgpu_ring_commit(ring);
253
254         for (i = 0; i < adev->usec_timeout; i++) {
255                 tmp = RREG32(scratch);
256                 if (tmp == 0xDEADBEEF)
257                         break;
258                 DRM_UDELAY(1);
259         }
260         if (i < adev->usec_timeout) {
261                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
262                          ring->idx, i);
263         } else {
264                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
265                           ring->idx, scratch, tmp);
266                 r = -EINVAL;
267         }
268         amdgpu_gfx_scratch_free(adev, scratch);
269         return r;
270 }
271
272 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
273 {
274         struct amdgpu_device *adev = ring->adev;
275         struct amdgpu_ib ib;
276         struct dma_fence *f = NULL;
277         uint32_t scratch;
278         uint32_t tmp = 0;
279         long r;
280
281         r = amdgpu_gfx_scratch_get(adev, &scratch);
282         if (r) {
283                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
284                 return r;
285         }
286         WREG32(scratch, 0xCAFEDEAD);
287         memset(&ib, 0, sizeof(ib));
288         r = amdgpu_ib_get(adev, NULL, 256, &ib);
289         if (r) {
290                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
291                 goto err1;
292         }
293         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
294         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
295         ib.ptr[2] = 0xDEADBEEF;
296         ib.length_dw = 3;
297
298         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
299         if (r)
300                 goto err2;
301
302         r = dma_fence_wait_timeout(f, false, timeout);
303         if (r == 0) {
304                 DRM_ERROR("amdgpu: IB test timed out.\n");
305                 r = -ETIMEDOUT;
306                 goto err2;
307         } else if (r < 0) {
308                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
309                 goto err2;
310         }
311         tmp = RREG32(scratch);
312         if (tmp == 0xDEADBEEF) {
313                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
314                 r = 0;
315         } else {
316                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
317                           scratch, tmp);
318                 r = -EINVAL;
319         }
320 err2:
321         amdgpu_ib_free(adev, &ib, NULL);
322         dma_fence_put(f);
323 err1:
324         amdgpu_gfx_scratch_free(adev, scratch);
325         return r;
326 }
327
328
329 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
330 {
331         release_firmware(adev->gfx.pfp_fw);
332         adev->gfx.pfp_fw = NULL;
333         release_firmware(adev->gfx.me_fw);
334         adev->gfx.me_fw = NULL;
335         release_firmware(adev->gfx.ce_fw);
336         adev->gfx.ce_fw = NULL;
337         release_firmware(adev->gfx.rlc_fw);
338         adev->gfx.rlc_fw = NULL;
339         release_firmware(adev->gfx.mec_fw);
340         adev->gfx.mec_fw = NULL;
341         release_firmware(adev->gfx.mec2_fw);
342         adev->gfx.mec2_fw = NULL;
343
344         kfree(adev->gfx.rlc.register_list_format);
345 }
346
347 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
348 {
349         const char *chip_name;
350         char fw_name[30];
351         int err;
352         struct amdgpu_firmware_info *info = NULL;
353         const struct common_firmware_header *header = NULL;
354         const struct gfx_firmware_header_v1_0 *cp_hdr;
355         const struct rlc_firmware_header_v2_0 *rlc_hdr;
356         unsigned int *tmp = NULL;
357         unsigned int i = 0;
358
359         DRM_DEBUG("\n");
360
361         switch (adev->asic_type) {
362         case CHIP_VEGA10:
363                 chip_name = "vega10";
364                 break;
365         case CHIP_RAVEN:
366                 chip_name = "raven";
367                 break;
368         default:
369                 BUG();
370         }
371
372         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
373         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
374         if (err)
375                 goto out;
376         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
377         if (err)
378                 goto out;
379         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
380         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
381         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
382
383         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
384         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
385         if (err)
386                 goto out;
387         err = amdgpu_ucode_validate(adev->gfx.me_fw);
388         if (err)
389                 goto out;
390         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
391         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
392         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
393
394         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
395         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
396         if (err)
397                 goto out;
398         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
399         if (err)
400                 goto out;
401         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
402         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
403         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
404
405         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
406         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
407         if (err)
408                 goto out;
409         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
410         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
411         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
412         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
413         adev->gfx.rlc.save_and_restore_offset =
414                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
415         adev->gfx.rlc.clear_state_descriptor_offset =
416                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
417         adev->gfx.rlc.avail_scratch_ram_locations =
418                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
419         adev->gfx.rlc.reg_restore_list_size =
420                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
421         adev->gfx.rlc.reg_list_format_start =
422                         le32_to_cpu(rlc_hdr->reg_list_format_start);
423         adev->gfx.rlc.reg_list_format_separate_start =
424                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
425         adev->gfx.rlc.starting_offsets_start =
426                         le32_to_cpu(rlc_hdr->starting_offsets_start);
427         adev->gfx.rlc.reg_list_format_size_bytes =
428                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
429         adev->gfx.rlc.reg_list_size_bytes =
430                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
431         adev->gfx.rlc.register_list_format =
432                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
433                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
434         if (!adev->gfx.rlc.register_list_format) {
435                 err = -ENOMEM;
436                 goto out;
437         }
438
439         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
440                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
441         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
442                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
443
444         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
445
446         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
447                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
448         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
449                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
450
451         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
452         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
453         if (err)
454                 goto out;
455         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
456         if (err)
457                 goto out;
458         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
459         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
460         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
461
462
463         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
464         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
465         if (!err) {
466                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
467                 if (err)
468                         goto out;
469                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
470                 adev->gfx.mec2_fw->data;
471                 adev->gfx.mec2_fw_version =
472                 le32_to_cpu(cp_hdr->header.ucode_version);
473                 adev->gfx.mec2_feature_version =
474                 le32_to_cpu(cp_hdr->ucode_feature_version);
475         } else {
476                 err = 0;
477                 adev->gfx.mec2_fw = NULL;
478         }
479
480         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
481                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
482                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
483                 info->fw = adev->gfx.pfp_fw;
484                 header = (const struct common_firmware_header *)info->fw->data;
485                 adev->firmware.fw_size +=
486                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
487
488                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
489                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
490                 info->fw = adev->gfx.me_fw;
491                 header = (const struct common_firmware_header *)info->fw->data;
492                 adev->firmware.fw_size +=
493                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
494
495                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
496                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
497                 info->fw = adev->gfx.ce_fw;
498                 header = (const struct common_firmware_header *)info->fw->data;
499                 adev->firmware.fw_size +=
500                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
501
502                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
503                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
504                 info->fw = adev->gfx.rlc_fw;
505                 header = (const struct common_firmware_header *)info->fw->data;
506                 adev->firmware.fw_size +=
507                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
508
509                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
510                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
511                 info->fw = adev->gfx.mec_fw;
512                 header = (const struct common_firmware_header *)info->fw->data;
513                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
514                 adev->firmware.fw_size +=
515                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
516
517                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
518                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
519                 info->fw = adev->gfx.mec_fw;
520                 adev->firmware.fw_size +=
521                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
522
523                 if (adev->gfx.mec2_fw) {
524                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
525                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
526                         info->fw = adev->gfx.mec2_fw;
527                         header = (const struct common_firmware_header *)info->fw->data;
528                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
529                         adev->firmware.fw_size +=
530                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
531                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
532                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
533                         info->fw = adev->gfx.mec2_fw;
534                         adev->firmware.fw_size +=
535                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
536                 }
537
538         }
539
540 out:
541         if (err) {
542                 dev_err(adev->dev,
543                         "gfx9: Failed to load firmware \"%s\"\n",
544                         fw_name);
545                 release_firmware(adev->gfx.pfp_fw);
546                 adev->gfx.pfp_fw = NULL;
547                 release_firmware(adev->gfx.me_fw);
548                 adev->gfx.me_fw = NULL;
549                 release_firmware(adev->gfx.ce_fw);
550                 adev->gfx.ce_fw = NULL;
551                 release_firmware(adev->gfx.rlc_fw);
552                 adev->gfx.rlc_fw = NULL;
553                 release_firmware(adev->gfx.mec_fw);
554                 adev->gfx.mec_fw = NULL;
555                 release_firmware(adev->gfx.mec2_fw);
556                 adev->gfx.mec2_fw = NULL;
557         }
558         return err;
559 }
560
561 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
562 {
563         u32 count = 0;
564         const struct cs_section_def *sect = NULL;
565         const struct cs_extent_def *ext = NULL;
566
567         /* begin clear state */
568         count += 2;
569         /* context control state */
570         count += 3;
571
572         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
573                 for (ext = sect->section; ext->extent != NULL; ++ext) {
574                         if (sect->id == SECT_CONTEXT)
575                                 count += 2 + ext->reg_count;
576                         else
577                                 return 0;
578                 }
579         }
580
581         /* end clear state */
582         count += 2;
583         /* clear state */
584         count += 2;
585
586         return count;
587 }
588
589 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
590                                     volatile u32 *buffer)
591 {
592         u32 count = 0, i;
593         const struct cs_section_def *sect = NULL;
594         const struct cs_extent_def *ext = NULL;
595
596         if (adev->gfx.rlc.cs_data == NULL)
597                 return;
598         if (buffer == NULL)
599                 return;
600
601         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
602         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
603
604         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
605         buffer[count++] = cpu_to_le32(0x80000000);
606         buffer[count++] = cpu_to_le32(0x80000000);
607
608         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
609                 for (ext = sect->section; ext->extent != NULL; ++ext) {
610                         if (sect->id == SECT_CONTEXT) {
611                                 buffer[count++] =
612                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
613                                 buffer[count++] = cpu_to_le32(ext->reg_index -
614                                                 PACKET3_SET_CONTEXT_REG_START);
615                                 for (i = 0; i < ext->reg_count; i++)
616                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
617                         } else {
618                                 return;
619                         }
620                 }
621         }
622
623         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
624         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
625
626         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
627         buffer[count++] = cpu_to_le32(0);
628 }
629
630 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
631 {
632         uint32_t data;
633
634         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
635         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
636         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
637         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
638         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
639
640         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
641         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
642
643         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
644         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
645
646         mutex_lock(&adev->grbm_idx_mutex);
647         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
648         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
649         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
650
651         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
652         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
653         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
654         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
655         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
656
657         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
658         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
659         data &= 0x0000FFFF;
660         data |= 0x00C00000;
661         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
662
663         /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */
664         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF);
665
666         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
667          * but used for RLC_LB_CNTL configuration */
668         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
669         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
670         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
671         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
672         mutex_unlock(&adev->grbm_idx_mutex);
673 }
674
675 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
676 {
677         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
678 }
679
680 static void rv_init_cp_jump_table(struct amdgpu_device *adev)
681 {
682         const __le32 *fw_data;
683         volatile u32 *dst_ptr;
684         int me, i, max_me = 5;
685         u32 bo_offset = 0;
686         u32 table_offset, table_size;
687
688         /* write the cp table buffer */
689         dst_ptr = adev->gfx.rlc.cp_table_ptr;
690         for (me = 0; me < max_me; me++) {
691                 if (me == 0) {
692                         const struct gfx_firmware_header_v1_0 *hdr =
693                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
694                         fw_data = (const __le32 *)
695                                 (adev->gfx.ce_fw->data +
696                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
697                         table_offset = le32_to_cpu(hdr->jt_offset);
698                         table_size = le32_to_cpu(hdr->jt_size);
699                 } else if (me == 1) {
700                         const struct gfx_firmware_header_v1_0 *hdr =
701                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
702                         fw_data = (const __le32 *)
703                                 (adev->gfx.pfp_fw->data +
704                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
705                         table_offset = le32_to_cpu(hdr->jt_offset);
706                         table_size = le32_to_cpu(hdr->jt_size);
707                 } else if (me == 2) {
708                         const struct gfx_firmware_header_v1_0 *hdr =
709                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
710                         fw_data = (const __le32 *)
711                                 (adev->gfx.me_fw->data +
712                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
713                         table_offset = le32_to_cpu(hdr->jt_offset);
714                         table_size = le32_to_cpu(hdr->jt_size);
715                 } else if (me == 3) {
716                         const struct gfx_firmware_header_v1_0 *hdr =
717                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
718                         fw_data = (const __le32 *)
719                                 (adev->gfx.mec_fw->data +
720                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
721                         table_offset = le32_to_cpu(hdr->jt_offset);
722                         table_size = le32_to_cpu(hdr->jt_size);
723                 } else  if (me == 4) {
724                         const struct gfx_firmware_header_v1_0 *hdr =
725                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
726                         fw_data = (const __le32 *)
727                                 (adev->gfx.mec2_fw->data +
728                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
729                         table_offset = le32_to_cpu(hdr->jt_offset);
730                         table_size = le32_to_cpu(hdr->jt_size);
731                 }
732
733                 for (i = 0; i < table_size; i ++) {
734                         dst_ptr[bo_offset + i] =
735                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
736                 }
737
738                 bo_offset += table_size;
739         }
740 }
741
742 static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
743 {
744         /* clear state block */
745         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
746                         &adev->gfx.rlc.clear_state_gpu_addr,
747                         (void **)&adev->gfx.rlc.cs_ptr);
748
749         /* jump table block */
750         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
751                         &adev->gfx.rlc.cp_table_gpu_addr,
752                         (void **)&adev->gfx.rlc.cp_table_ptr);
753 }
754
755 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
756 {
757         volatile u32 *dst_ptr;
758         u32 dws;
759         const struct cs_section_def *cs_data;
760         int r;
761
762         adev->gfx.rlc.cs_data = gfx9_cs_data;
763
764         cs_data = adev->gfx.rlc.cs_data;
765
766         if (cs_data) {
767                 /* clear state block */
768                 adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
769                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
770                                               AMDGPU_GEM_DOMAIN_VRAM,
771                                               &adev->gfx.rlc.clear_state_obj,
772                                               &adev->gfx.rlc.clear_state_gpu_addr,
773                                               (void **)&adev->gfx.rlc.cs_ptr);
774                 if (r) {
775                         dev_err(adev->dev, "(%d) failed to create rlc csb bo\n",
776                                 r);
777                         gfx_v9_0_rlc_fini(adev);
778                         return r;
779                 }
780                 /* set up the cs buffer */
781                 dst_ptr = adev->gfx.rlc.cs_ptr;
782                 gfx_v9_0_get_csb_buffer(adev, dst_ptr);
783                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
784                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
785         }
786
787         if (adev->asic_type == CHIP_RAVEN) {
788                 /* TODO: double check the cp_table_size for RV */
789                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
790                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
791                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
792                                               &adev->gfx.rlc.cp_table_obj,
793                                               &adev->gfx.rlc.cp_table_gpu_addr,
794                                               (void **)&adev->gfx.rlc.cp_table_ptr);
795                 if (r) {
796                         dev_err(adev->dev,
797                                 "(%d) failed to create cp table bo\n", r);
798                         gfx_v9_0_rlc_fini(adev);
799                         return r;
800                 }
801
802                 rv_init_cp_jump_table(adev);
803                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
804                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
805
806                 gfx_v9_0_init_lbpw(adev);
807         }
808
809         return 0;
810 }
811
812 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
813 {
814         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
815         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
816 }
817
818 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
819 {
820         int r;
821         u32 *hpd;
822         const __le32 *fw_data;
823         unsigned fw_size;
824         u32 *fw;
825         size_t mec_hpd_size;
826
827         const struct gfx_firmware_header_v1_0 *mec_hdr;
828
829         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
830
831         /* take ownership of the relevant compute queues */
832         amdgpu_gfx_compute_queue_acquire(adev);
833         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
834
835         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
836                                       AMDGPU_GEM_DOMAIN_GTT,
837                                       &adev->gfx.mec.hpd_eop_obj,
838                                       &adev->gfx.mec.hpd_eop_gpu_addr,
839                                       (void **)&hpd);
840         if (r) {
841                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
842                 gfx_v9_0_mec_fini(adev);
843                 return r;
844         }
845
846         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
847
848         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
849         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
850
851         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
852
853         fw_data = (const __le32 *)
854                 (adev->gfx.mec_fw->data +
855                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
856         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
857
858         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
859                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
860                                       &adev->gfx.mec.mec_fw_obj,
861                                       &adev->gfx.mec.mec_fw_gpu_addr,
862                                       (void **)&fw);
863         if (r) {
864                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
865                 gfx_v9_0_mec_fini(adev);
866                 return r;
867         }
868
869         memcpy(fw, fw_data, fw_size);
870
871         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
872         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
873
874         return 0;
875 }
876
877 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
878 {
879         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
880                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
881                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
882                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
883                 (SQ_IND_INDEX__FORCE_READ_MASK));
884         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
885 }
886
887 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
888                            uint32_t wave, uint32_t thread,
889                            uint32_t regno, uint32_t num, uint32_t *out)
890 {
891         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
892                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
893                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
894                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
895                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
896                 (SQ_IND_INDEX__FORCE_READ_MASK) |
897                 (SQ_IND_INDEX__AUTO_INCR_MASK));
898         while (num--)
899                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
900 }
901
902 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
903 {
904         /* type 1 wave data */
905         dst[(*no_fields)++] = 1;
906         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
907         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
908         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
909         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
910         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
911         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
912         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
913         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
914         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
915         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
916         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
917         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
918         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
919         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
920 }
921
922 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
923                                      uint32_t wave, uint32_t start,
924                                      uint32_t size, uint32_t *dst)
925 {
926         wave_read_regs(
927                 adev, simd, wave, 0,
928                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
929 }
930
931 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
932                                      uint32_t wave, uint32_t thread,
933                                      uint32_t start, uint32_t size,
934                                      uint32_t *dst)
935 {
936         wave_read_regs(
937                 adev, simd, wave, thread,
938                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
939 }
940
941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
942         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
943         .select_se_sh = &gfx_v9_0_select_se_sh,
944         .read_wave_data = &gfx_v9_0_read_wave_data,
945         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
946         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
947 };
948
949 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
950 {
951         u32 gb_addr_config;
952
953         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
954
955         switch (adev->asic_type) {
956         case CHIP_VEGA10:
957                 adev->gfx.config.max_hw_contexts = 8;
958                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
959                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
960                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
961                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
962                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
963                 break;
964         case CHIP_RAVEN:
965                 adev->gfx.config.max_hw_contexts = 8;
966                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
967                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
968                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
969                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
970                 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
971                 break;
972         default:
973                 BUG();
974                 break;
975         }
976
977         adev->gfx.config.gb_addr_config = gb_addr_config;
978
979         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
980                         REG_GET_FIELD(
981                                         adev->gfx.config.gb_addr_config,
982                                         GB_ADDR_CONFIG,
983                                         NUM_PIPES);
984
985         adev->gfx.config.max_tile_pipes =
986                 adev->gfx.config.gb_addr_config_fields.num_pipes;
987
988         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
989                         REG_GET_FIELD(
990                                         adev->gfx.config.gb_addr_config,
991                                         GB_ADDR_CONFIG,
992                                         NUM_BANKS);
993         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
994                         REG_GET_FIELD(
995                                         adev->gfx.config.gb_addr_config,
996                                         GB_ADDR_CONFIG,
997                                         MAX_COMPRESSED_FRAGS);
998         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
999                         REG_GET_FIELD(
1000                                         adev->gfx.config.gb_addr_config,
1001                                         GB_ADDR_CONFIG,
1002                                         NUM_RB_PER_SE);
1003         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1004                         REG_GET_FIELD(
1005                                         adev->gfx.config.gb_addr_config,
1006                                         GB_ADDR_CONFIG,
1007                                         NUM_SHADER_ENGINES);
1008         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1009                         REG_GET_FIELD(
1010                                         adev->gfx.config.gb_addr_config,
1011                                         GB_ADDR_CONFIG,
1012                                         PIPE_INTERLEAVE_SIZE));
1013 }
1014
1015 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1016                                    struct amdgpu_ngg_buf *ngg_buf,
1017                                    int size_se,
1018                                    int default_size_se)
1019 {
1020         int r;
1021
1022         if (size_se < 0) {
1023                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1024                 return -EINVAL;
1025         }
1026         size_se = size_se ? size_se : default_size_se;
1027
1028         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1029         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1030                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1031                                     &ngg_buf->bo,
1032                                     &ngg_buf->gpu_addr,
1033                                     NULL);
1034         if (r) {
1035                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1036                 return r;
1037         }
1038         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1039
1040         return r;
1041 }
1042
1043 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1044 {
1045         int i;
1046
1047         for (i = 0; i < NGG_BUF_MAX; i++)
1048                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1049                                       &adev->gfx.ngg.buf[i].gpu_addr,
1050                                       NULL);
1051
1052         memset(&adev->gfx.ngg.buf[0], 0,
1053                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1054
1055         adev->gfx.ngg.init = false;
1056
1057         return 0;
1058 }
1059
1060 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1061 {
1062         int r;
1063
1064         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1065                 return 0;
1066
1067         /* GDS reserve memory: 64 bytes alignment */
1068         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1069         adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
1070         adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
1071         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1072         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1073
1074         /* Primitive Buffer */
1075         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1076                                     amdgpu_prim_buf_per_se,
1077                                     64 * 1024);
1078         if (r) {
1079                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1080                 goto err;
1081         }
1082
1083         /* Position Buffer */
1084         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1085                                     amdgpu_pos_buf_per_se,
1086                                     256 * 1024);
1087         if (r) {
1088                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1089                 goto err;
1090         }
1091
1092         /* Control Sideband */
1093         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1094                                     amdgpu_cntl_sb_buf_per_se,
1095                                     256);
1096         if (r) {
1097                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1098                 goto err;
1099         }
1100
1101         /* Parameter Cache, not created by default */
1102         if (amdgpu_param_buf_per_se <= 0)
1103                 goto out;
1104
1105         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1106                                     amdgpu_param_buf_per_se,
1107                                     512 * 1024);
1108         if (r) {
1109                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1110                 goto err;
1111         }
1112
1113 out:
1114         adev->gfx.ngg.init = true;
1115         return 0;
1116 err:
1117         gfx_v9_0_ngg_fini(adev);
1118         return r;
1119 }
1120
1121 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1122 {
1123         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1124         int r;
1125         u32 data, base;
1126
1127         if (!amdgpu_ngg)
1128                 return 0;
1129
1130         /* Program buffer size */
1131         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1132                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1133         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1134                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1135         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1136
1137         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1138                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1139         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1140                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1141         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1142
1143         /* Program buffer base address */
1144         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1145         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1146         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1147
1148         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1149         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1150         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1151
1152         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1153         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1154         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1155
1156         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1157         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1158         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1159
1160         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1161         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1162         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1163
1164         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1165         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1166         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1167
1168         /* Clear GDS reserved memory */
1169         r = amdgpu_ring_alloc(ring, 17);
1170         if (r) {
1171                 DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
1172                           ring->idx, r);
1173                 return r;
1174         }
1175
1176         gfx_v9_0_write_data_to_reg(ring, 0, false,
1177                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1178                                    (adev->gds.mem.total_size +
1179                                     adev->gfx.ngg.gds_reserve_size) >>
1180                                    AMDGPU_GDS_SHIFT);
1181
1182         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1183         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1184                                 PACKET3_DMA_DATA_DST_SEL(1) |
1185                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1186         amdgpu_ring_write(ring, 0);
1187         amdgpu_ring_write(ring, 0);
1188         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1189         amdgpu_ring_write(ring, 0);
1190         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1191                                 adev->gfx.ngg.gds_reserve_size);
1192
1193         gfx_v9_0_write_data_to_reg(ring, 0, false,
1194                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1195
1196         amdgpu_ring_commit(ring);
1197
1198         return 0;
1199 }
1200
1201 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1202                                       int mec, int pipe, int queue)
1203 {
1204         int r;
1205         unsigned irq_type;
1206         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1207
1208         ring = &adev->gfx.compute_ring[ring_id];
1209
1210         /* mec0 is me1 */
1211         ring->me = mec + 1;
1212         ring->pipe = pipe;
1213         ring->queue = queue;
1214
1215         ring->ring_obj = NULL;
1216         ring->use_doorbell = true;
1217         ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1;
1218         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1219                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1220         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1221
1222         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1223                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1224                 + ring->pipe;
1225
1226         /* type-2 packets are deprecated on MEC, use type-3 instead */
1227         r = amdgpu_ring_init(adev, ring, 1024,
1228                              &adev->gfx.eop_irq, irq_type);
1229         if (r)
1230                 return r;
1231
1232
1233         return 0;
1234 }
1235
1236 static int gfx_v9_0_sw_init(void *handle)
1237 {
1238         int i, j, k, r, ring_id;
1239         struct amdgpu_ring *ring;
1240         struct amdgpu_kiq *kiq;
1241         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1242
1243         switch (adev->asic_type) {
1244         case CHIP_VEGA10:
1245         case CHIP_RAVEN:
1246                 adev->gfx.mec.num_mec = 2;
1247                 break;
1248         default:
1249                 adev->gfx.mec.num_mec = 1;
1250                 break;
1251         }
1252
1253         adev->gfx.mec.num_pipe_per_mec = 4;
1254         adev->gfx.mec.num_queue_per_pipe = 8;
1255
1256         /* KIQ event */
1257         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
1258         if (r)
1259                 return r;
1260
1261         /* EOP Event */
1262         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
1263         if (r)
1264                 return r;
1265
1266         /* Privileged reg */
1267         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184,
1268                               &adev->gfx.priv_reg_irq);
1269         if (r)
1270                 return r;
1271
1272         /* Privileged inst */
1273         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185,
1274                               &adev->gfx.priv_inst_irq);
1275         if (r)
1276                 return r;
1277
1278         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1279
1280         gfx_v9_0_scratch_init(adev);
1281
1282         r = gfx_v9_0_init_microcode(adev);
1283         if (r) {
1284                 DRM_ERROR("Failed to load gfx firmware!\n");
1285                 return r;
1286         }
1287
1288         r = gfx_v9_0_rlc_init(adev);
1289         if (r) {
1290                 DRM_ERROR("Failed to init rlc BOs!\n");
1291                 return r;
1292         }
1293
1294         r = gfx_v9_0_mec_init(adev);
1295         if (r) {
1296                 DRM_ERROR("Failed to init MEC BOs!\n");
1297                 return r;
1298         }
1299
1300         /* set up the gfx ring */
1301         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1302                 ring = &adev->gfx.gfx_ring[i];
1303                 ring->ring_obj = NULL;
1304                 if (!i)
1305                         sprintf(ring->name, "gfx");
1306                 else
1307                         sprintf(ring->name, "gfx_%d", i);
1308                 ring->use_doorbell = true;
1309                 ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
1310                 r = amdgpu_ring_init(adev, ring, 1024,
1311                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1312                 if (r)
1313                         return r;
1314         }
1315
1316         /* set up the compute queues - allocate horizontally across pipes */
1317         ring_id = 0;
1318         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1319                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1320                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1321                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1322                                         continue;
1323
1324                                 r = gfx_v9_0_compute_ring_init(adev,
1325                                                                ring_id,
1326                                                                i, k, j);
1327                                 if (r)
1328                                         return r;
1329
1330                                 ring_id++;
1331                         }
1332                 }
1333         }
1334
1335         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1336         if (r) {
1337                 DRM_ERROR("Failed to init KIQ BOs!\n");
1338                 return r;
1339         }
1340
1341         kiq = &adev->gfx.kiq;
1342         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1343         if (r)
1344                 return r;
1345
1346         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1347         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1348         if (r)
1349                 return r;
1350
1351         /* reserve GDS, GWS and OA resource for gfx */
1352         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1353                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
1354                                     &adev->gds.gds_gfx_bo, NULL, NULL);
1355         if (r)
1356                 return r;
1357
1358         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1359                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
1360                                     &adev->gds.gws_gfx_bo, NULL, NULL);
1361         if (r)
1362                 return r;
1363
1364         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1365                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
1366                                     &adev->gds.oa_gfx_bo, NULL, NULL);
1367         if (r)
1368                 return r;
1369
1370         adev->gfx.ce_ram_size = 0x8000;
1371
1372         gfx_v9_0_gpu_early_init(adev);
1373
1374         r = gfx_v9_0_ngg_init(adev);
1375         if (r)
1376                 return r;
1377
1378         return 0;
1379 }
1380
1381
1382 static int gfx_v9_0_sw_fini(void *handle)
1383 {
1384         int i;
1385         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1386
1387         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1388         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1389         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1390
1391         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1392                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1393         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1394                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1395
1396         amdgpu_gfx_compute_mqd_sw_fini(adev);
1397         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1398         amdgpu_gfx_kiq_fini(adev);
1399
1400         gfx_v9_0_mec_fini(adev);
1401         gfx_v9_0_ngg_fini(adev);
1402         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
1403                                 &adev->gfx.rlc.clear_state_gpu_addr,
1404                                 (void **)&adev->gfx.rlc.cs_ptr);
1405         if (adev->asic_type == CHIP_RAVEN) {
1406                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1407                                 &adev->gfx.rlc.cp_table_gpu_addr,
1408                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1409         }
1410         gfx_v9_0_free_microcode(adev);
1411
1412         return 0;
1413 }
1414
1415
1416 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1417 {
1418         /* TODO */
1419 }
1420
1421 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1422 {
1423         u32 data;
1424
1425         if (instance == 0xffffffff)
1426                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1427         else
1428                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1429
1430         if (se_num == 0xffffffff)
1431                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1432         else
1433                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1434
1435         if (sh_num == 0xffffffff)
1436                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1437         else
1438                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1439
1440         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
1441 }
1442
1443 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1444 {
1445         u32 data, mask;
1446
1447         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1448         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1449
1450         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1451         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1452
1453         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1454                                          adev->gfx.config.max_sh_per_se);
1455
1456         return (~data) & mask;
1457 }
1458
1459 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1460 {
1461         int i, j;
1462         u32 data;
1463         u32 active_rbs = 0;
1464         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1465                                         adev->gfx.config.max_sh_per_se;
1466
1467         mutex_lock(&adev->grbm_idx_mutex);
1468         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1469                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1470                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1471                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1472                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1473                                                rb_bitmap_width_per_sh);
1474                 }
1475         }
1476         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1477         mutex_unlock(&adev->grbm_idx_mutex);
1478
1479         adev->gfx.config.backend_enable_mask = active_rbs;
1480         adev->gfx.config.num_rbs = hweight32(active_rbs);
1481 }
1482
1483 #define DEFAULT_SH_MEM_BASES    (0x6000)
1484 #define FIRST_COMPUTE_VMID      (8)
1485 #define LAST_COMPUTE_VMID       (16)
1486 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1487 {
1488         int i;
1489         uint32_t sh_mem_config;
1490         uint32_t sh_mem_bases;
1491
1492         /*
1493          * Configure apertures:
1494          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1495          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1496          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1497          */
1498         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1499
1500         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1501                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1502                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1503
1504         mutex_lock(&adev->srbm_mutex);
1505         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1506                 soc15_grbm_select(adev, 0, 0, 0, i);
1507                 /* CP and shaders */
1508                 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1509                 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1510         }
1511         soc15_grbm_select(adev, 0, 0, 0, 0);
1512         mutex_unlock(&adev->srbm_mutex);
1513 }
1514
1515 static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
1516 {
1517         u32 tmp;
1518         int i;
1519
1520         WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1521
1522         gfx_v9_0_tiling_mode_table_init(adev);
1523
1524         gfx_v9_0_setup_rb(adev);
1525         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1526
1527         /* XXX SH_MEM regs */
1528         /* where to put LDS, scratch, GPUVM in FSA64 space */
1529         mutex_lock(&adev->srbm_mutex);
1530         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1531                 soc15_grbm_select(adev, 0, 0, 0, i);
1532                 /* CP and shaders */
1533                 if (i == 0) {
1534                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1535                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1536                         WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1537                         WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
1538                 } else {
1539                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1540                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1541                         WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1542                         tmp = adev->mc.shared_aperture_start >> 48;
1543                         WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1544                 }
1545         }
1546         soc15_grbm_select(adev, 0, 0, 0, 0);
1547
1548         mutex_unlock(&adev->srbm_mutex);
1549
1550         gfx_v9_0_init_compute_vmid(adev);
1551
1552         mutex_lock(&adev->grbm_idx_mutex);
1553         /*
1554          * making sure that the following register writes will be broadcasted
1555          * to all the shaders
1556          */
1557         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1558
1559         WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
1560                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
1561                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1562                    (adev->gfx.config.sc_prim_fifo_size_backend <<
1563                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1564                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
1565                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1566                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1567                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1568         mutex_unlock(&adev->grbm_idx_mutex);
1569
1570 }
1571
1572 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1573 {
1574         u32 i, j, k;
1575         u32 mask;
1576
1577         mutex_lock(&adev->grbm_idx_mutex);
1578         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1579                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1580                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1581                         for (k = 0; k < adev->usec_timeout; k++) {
1582                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1583                                         break;
1584                                 udelay(1);
1585                         }
1586                         if (k == adev->usec_timeout) {
1587                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1588                                                       0xffffffff, 0xffffffff);
1589                                 mutex_unlock(&adev->grbm_idx_mutex);
1590                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1591                                          i, j);
1592                                 return;
1593                         }
1594                 }
1595         }
1596         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1597         mutex_unlock(&adev->grbm_idx_mutex);
1598
1599         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1600                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1601                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1602                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
1603         for (k = 0; k < adev->usec_timeout; k++) {
1604                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
1605                         break;
1606                 udelay(1);
1607         }
1608 }
1609
1610 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1611                                                bool enable)
1612 {
1613         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
1614
1615         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
1616         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
1617         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
1618         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
1619
1620         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
1621 }
1622
1623 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
1624 {
1625         /* csib */
1626         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
1627                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1628         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
1629                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1630         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
1631                         adev->gfx.rlc.clear_state_size);
1632 }
1633
1634 static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
1635                                 int indirect_offset,
1636                                 int list_size,
1637                                 int *unique_indirect_regs,
1638                                 int *unique_indirect_reg_count,
1639                                 int max_indirect_reg_count,
1640                                 int *indirect_start_offsets,
1641                                 int *indirect_start_offsets_count,
1642                                 int max_indirect_start_offsets_count)
1643 {
1644         int idx;
1645         bool new_entry = true;
1646
1647         for (; indirect_offset < list_size; indirect_offset++) {
1648
1649                 if (new_entry) {
1650                         new_entry = false;
1651                         indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
1652                         *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
1653                         BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
1654                 }
1655
1656                 if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
1657                         new_entry = true;
1658                         continue;
1659                 }
1660
1661                 indirect_offset += 2;
1662
1663                 /* look for the matching indice */
1664                 for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
1665                         if (unique_indirect_regs[idx] ==
1666                                 register_list_format[indirect_offset])
1667                                 break;
1668                 }
1669
1670                 if (idx >= *unique_indirect_reg_count) {
1671                         unique_indirect_regs[*unique_indirect_reg_count] =
1672                                 register_list_format[indirect_offset];
1673                         idx = *unique_indirect_reg_count;
1674                         *unique_indirect_reg_count = *unique_indirect_reg_count + 1;
1675                         BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
1676                 }
1677
1678                 register_list_format[indirect_offset] = idx;
1679         }
1680 }
1681
1682 static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1683 {
1684         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1685         int unique_indirect_reg_count = 0;
1686
1687         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1688         int indirect_start_offsets_count = 0;
1689
1690         int list_size = 0;
1691         int i = 0;
1692         u32 tmp = 0;
1693
1694         u32 *register_list_format =
1695                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
1696         if (!register_list_format)
1697                 return -ENOMEM;
1698         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
1699                 adev->gfx.rlc.reg_list_format_size_bytes);
1700
1701         /* setup unique_indirect_regs array and indirect_start_offsets array */
1702         gfx_v9_0_parse_ind_reg_list(register_list_format,
1703                                 GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
1704                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
1705                                 unique_indirect_regs,
1706                                 &unique_indirect_reg_count,
1707                                 ARRAY_SIZE(unique_indirect_regs),
1708                                 indirect_start_offsets,
1709                                 &indirect_start_offsets_count,
1710                                 ARRAY_SIZE(indirect_start_offsets));
1711
1712         /* enable auto inc in case it is disabled */
1713         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
1714         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1715         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
1716
1717         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
1718         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
1719                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
1720         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1721                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1722                         adev->gfx.rlc.register_restore[i]);
1723
1724         /* load direct register */
1725         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
1726         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1727                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1728                         adev->gfx.rlc.register_restore[i]);
1729
1730         /* load indirect register */
1731         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1732                 adev->gfx.rlc.reg_list_format_start);
1733         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
1734                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1735                         register_list_format[i]);
1736
1737         /* set save/restore list size */
1738         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
1739         list_size = list_size >> 1;
1740         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1741                 adev->gfx.rlc.reg_restore_list_size);
1742         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
1743
1744         /* write the starting offsets to RLC scratch ram */
1745         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1746                 adev->gfx.rlc.starting_offsets_start);
1747         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
1748                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1749                         indirect_start_offsets[i]);
1750
1751         /* load unique indirect regs*/
1752         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
1753                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
1754                         unique_indirect_regs[i] & 0x3FFFF);
1755                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
1756                         unique_indirect_regs[i] >> 20);
1757         }
1758
1759         kfree(register_list_format);
1760         return 0;
1761 }
1762
1763 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
1764 {
1765         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
1766 }
1767
1768 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
1769                                              bool enable)
1770 {
1771         uint32_t data = 0;
1772         uint32_t default_data = 0;
1773
1774         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
1775         if (enable == true) {
1776                 /* enable GFXIP control over CGPG */
1777                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1778                 if(default_data != data)
1779                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1780
1781                 /* update status */
1782                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
1783                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
1784                 if(default_data != data)
1785                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1786         } else {
1787                 /* restore GFXIP control over GCPG */
1788                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1789                 if(default_data != data)
1790                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1791         }
1792 }
1793
1794 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
1795 {
1796         uint32_t data = 0;
1797
1798         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
1799                               AMD_PG_SUPPORT_GFX_SMG |
1800                               AMD_PG_SUPPORT_GFX_DMG)) {
1801                 /* init IDLE_POLL_COUNT = 60 */
1802                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
1803                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
1804                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
1805                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
1806
1807                 /* init RLC PG Delay */
1808                 data = 0;
1809                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
1810                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
1811                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
1812                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
1813                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
1814
1815                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
1816                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
1817                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
1818                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
1819
1820                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
1821                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
1822                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
1823                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
1824
1825                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
1826                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
1827
1828                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
1829                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
1830                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
1831
1832                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
1833         }
1834 }
1835
1836 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
1837                                                 bool enable)
1838 {
1839         uint32_t data = 0;
1840         uint32_t default_data = 0;
1841
1842         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1843         data = REG_SET_FIELD(data, RLC_PG_CNTL,
1844                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
1845                              enable ? 1 : 0);
1846         if (default_data != data)
1847                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1848 }
1849
1850 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
1851                                                 bool enable)
1852 {
1853         uint32_t data = 0;
1854         uint32_t default_data = 0;
1855
1856         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1857         data = REG_SET_FIELD(data, RLC_PG_CNTL,
1858                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
1859                              enable ? 1 : 0);
1860         if(default_data != data)
1861                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1862 }
1863
1864 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
1865                                         bool enable)
1866 {
1867         uint32_t data = 0;
1868         uint32_t default_data = 0;
1869
1870         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1871         data = REG_SET_FIELD(data, RLC_PG_CNTL,
1872                              CP_PG_DISABLE,
1873                              enable ? 0 : 1);
1874         if(default_data != data)
1875                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1876 }
1877
1878 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
1879                                                 bool enable)
1880 {
1881         uint32_t data, default_data;
1882
1883         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1884         data = REG_SET_FIELD(data, RLC_PG_CNTL,
1885                              GFX_POWER_GATING_ENABLE,
1886                              enable ? 1 : 0);
1887         if(default_data != data)
1888                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1889 }
1890
1891 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
1892                                                 bool enable)
1893 {
1894         uint32_t data, default_data;
1895
1896         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1897         data = REG_SET_FIELD(data, RLC_PG_CNTL,
1898                              GFX_PIPELINE_PG_ENABLE,
1899                              enable ? 1 : 0);
1900         if(default_data != data)
1901                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1902
1903         if (!enable)
1904                 /* read any GFX register to wake up GFX */
1905                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
1906 }
1907
1908 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
1909                                                        bool enable)
1910 {
1911         uint32_t data, default_data;
1912
1913         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1914         data = REG_SET_FIELD(data, RLC_PG_CNTL,
1915                              STATIC_PER_CU_PG_ENABLE,
1916                              enable ? 1 : 0);
1917         if(default_data != data)
1918                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1919 }
1920
1921 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
1922                                                 bool enable)
1923 {
1924         uint32_t data, default_data;
1925
1926         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1927         data = REG_SET_FIELD(data, RLC_PG_CNTL,
1928                              DYN_PER_CU_PG_ENABLE,
1929                              enable ? 1 : 0);
1930         if(default_data != data)
1931                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1932 }
1933
1934 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
1935 {
1936         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
1937                               AMD_PG_SUPPORT_GFX_SMG |
1938                               AMD_PG_SUPPORT_GFX_DMG |
1939                               AMD_PG_SUPPORT_CP |
1940                               AMD_PG_SUPPORT_GDS |
1941                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
1942                 gfx_v9_0_init_csb(adev);
1943                 gfx_v9_0_init_rlc_save_restore_list(adev);
1944                 gfx_v9_0_enable_save_restore_machine(adev);
1945
1946                 if (adev->asic_type == CHIP_RAVEN) {
1947                         WREG32(mmRLC_JUMP_TABLE_RESTORE,
1948                                 adev->gfx.rlc.cp_table_gpu_addr >> 8);
1949                         gfx_v9_0_init_gfx_power_gating(adev);
1950
1951                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
1952                                 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
1953                                 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
1954                         } else {
1955                                 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
1956                                 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
1957                         }
1958
1959                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
1960                                 gfx_v9_0_enable_cp_power_gating(adev, true);
1961                         else
1962                                 gfx_v9_0_enable_cp_power_gating(adev, false);
1963                 }
1964         }
1965 }
1966
1967 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
1968 {
1969         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
1970         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
1971         gfx_v9_0_wait_for_rlc_serdes(adev);
1972 }
1973
1974 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
1975 {
1976         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1977         udelay(50);
1978         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1979         udelay(50);
1980 }
1981
1982 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
1983 {
1984 #ifdef AMDGPU_RLC_DEBUG_RETRY
1985         u32 rlc_ucode_ver;
1986 #endif
1987
1988         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1989
1990         /* carrizo do enable cp interrupt after cp inited */
1991         if (!(adev->flags & AMD_IS_APU))
1992                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
1993
1994         udelay(50);
1995
1996 #ifdef AMDGPU_RLC_DEBUG_RETRY
1997         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
1998         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
1999         if(rlc_ucode_ver == 0x108) {
2000                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2001                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2002                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2003                  * default is 0x9C4 to create a 100us interval */
2004                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2005                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2006                  * to disable the page fault retry interrupts, default is
2007                  * 0x100 (256) */
2008                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2009         }
2010 #endif
2011 }
2012
2013 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2014 {
2015         const struct rlc_firmware_header_v2_0 *hdr;
2016         const __le32 *fw_data;
2017         unsigned i, fw_size;
2018
2019         if (!adev->gfx.rlc_fw)
2020                 return -EINVAL;
2021
2022         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2023         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2024
2025         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2026                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2027         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2028
2029         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2030                         RLCG_UCODE_LOADING_START_ADDRESS);
2031         for (i = 0; i < fw_size; i++)
2032                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2033         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2034
2035         return 0;
2036 }
2037
2038 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2039 {
2040         int r;
2041
2042         if (amdgpu_sriov_vf(adev)) {
2043                 gfx_v9_0_init_csb(adev);
2044                 return 0;
2045         }
2046
2047         gfx_v9_0_rlc_stop(adev);
2048
2049         /* disable CG */
2050         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2051
2052         /* disable PG */
2053         WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
2054
2055         gfx_v9_0_rlc_reset(adev);
2056
2057         gfx_v9_0_init_pg(adev);
2058
2059         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2060                 /* legacy rlc firmware loading */
2061                 r = gfx_v9_0_rlc_load_microcode(adev);
2062                 if (r)
2063                         return r;
2064         }
2065
2066         if (adev->asic_type == CHIP_RAVEN) {
2067                 if (amdgpu_lbpw != 0)
2068                         gfx_v9_0_enable_lbpw(adev, true);
2069                 else
2070                         gfx_v9_0_enable_lbpw(adev, false);
2071         }
2072
2073         gfx_v9_0_rlc_start(adev);
2074
2075         return 0;
2076 }
2077
2078 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2079 {
2080         int i;
2081         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2082
2083         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2084         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2085         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2086         if (!enable) {
2087                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2088                         adev->gfx.gfx_ring[i].ready = false;
2089         }
2090         WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
2091         udelay(50);
2092 }
2093
2094 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2095 {
2096         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2097         const struct gfx_firmware_header_v1_0 *ce_hdr;
2098         const struct gfx_firmware_header_v1_0 *me_hdr;
2099         const __le32 *fw_data;
2100         unsigned i, fw_size;
2101
2102         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2103                 return -EINVAL;
2104
2105         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2106                 adev->gfx.pfp_fw->data;
2107         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2108                 adev->gfx.ce_fw->data;
2109         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2110                 adev->gfx.me_fw->data;
2111
2112         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2113         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2114         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2115
2116         gfx_v9_0_cp_gfx_enable(adev, false);
2117
2118         /* PFP */
2119         fw_data = (const __le32 *)
2120                 (adev->gfx.pfp_fw->data +
2121                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2122         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2123         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2124         for (i = 0; i < fw_size; i++)
2125                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2126         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2127
2128         /* CE */
2129         fw_data = (const __le32 *)
2130                 (adev->gfx.ce_fw->data +
2131                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2132         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2133         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2134         for (i = 0; i < fw_size; i++)
2135                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2136         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2137
2138         /* ME */
2139         fw_data = (const __le32 *)
2140                 (adev->gfx.me_fw->data +
2141                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2142         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2143         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2144         for (i = 0; i < fw_size; i++)
2145                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2146         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2147
2148         return 0;
2149 }
2150
2151 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2152 {
2153         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2154         const struct cs_section_def *sect = NULL;
2155         const struct cs_extent_def *ext = NULL;
2156         int r, i, tmp;
2157
2158         /* init the CP */
2159         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2160         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2161
2162         gfx_v9_0_cp_gfx_enable(adev, true);
2163
2164         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2165         if (r) {
2166                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2167                 return r;
2168         }
2169
2170         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2171         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2172
2173         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2174         amdgpu_ring_write(ring, 0x80000000);
2175         amdgpu_ring_write(ring, 0x80000000);
2176
2177         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2178                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2179                         if (sect->id == SECT_CONTEXT) {
2180                                 amdgpu_ring_write(ring,
2181                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2182                                                ext->reg_count));
2183                                 amdgpu_ring_write(ring,
2184                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2185                                 for (i = 0; i < ext->reg_count; i++)
2186                                         amdgpu_ring_write(ring, ext->extent[i]);
2187                         }
2188                 }
2189         }
2190
2191         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2192         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2193
2194         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2195         amdgpu_ring_write(ring, 0);
2196
2197         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2198         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2199         amdgpu_ring_write(ring, 0x8000);
2200         amdgpu_ring_write(ring, 0x8000);
2201
2202         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2203         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2204                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2205         amdgpu_ring_write(ring, tmp);
2206         amdgpu_ring_write(ring, 0);
2207
2208         amdgpu_ring_commit(ring);
2209
2210         return 0;
2211 }
2212
2213 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2214 {
2215         struct amdgpu_ring *ring;
2216         u32 tmp;
2217         u32 rb_bufsz;
2218         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2219
2220         /* Set the write pointer delay */
2221         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2222
2223         /* set the RB to use vmid 0 */
2224         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2225
2226         /* Set ring buffer size */
2227         ring = &adev->gfx.gfx_ring[0];
2228         rb_bufsz = order_base_2(ring->ring_size / 8);
2229         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2230         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2231 #ifdef __BIG_ENDIAN
2232         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2233 #endif
2234         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2235
2236         /* Initialize the ring buffer's write pointers */
2237         ring->wptr = 0;
2238         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2239         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2240
2241         /* set the wb address wether it's enabled or not */
2242         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2243         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2244         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2245
2246         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2247         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2248         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2249
2250         mdelay(1);
2251         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2252
2253         rb_addr = ring->gpu_addr >> 8;
2254         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2255         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2256
2257         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2258         if (ring->use_doorbell) {
2259                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2260                                     DOORBELL_OFFSET, ring->doorbell_index);
2261                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2262                                     DOORBELL_EN, 1);
2263         } else {
2264                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2265         }
2266         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2267
2268         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2269                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2270         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2271
2272         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2273                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2274
2275
2276         /* start the ring */
2277         gfx_v9_0_cp_gfx_start(adev);
2278         ring->ready = true;
2279
2280         return 0;
2281 }
2282
2283 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2284 {
2285         int i;
2286
2287         if (enable) {
2288                 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
2289         } else {
2290                 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
2291                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2292                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2293                         adev->gfx.compute_ring[i].ready = false;
2294                 adev->gfx.kiq.ring.ready = false;
2295         }
2296         udelay(50);
2297 }
2298
2299 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2300 {
2301         const struct gfx_firmware_header_v1_0 *mec_hdr;
2302         const __le32 *fw_data;
2303         unsigned i;
2304         u32 tmp;
2305
2306         if (!adev->gfx.mec_fw)
2307                 return -EINVAL;
2308
2309         gfx_v9_0_cp_compute_enable(adev, false);
2310
2311         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2312         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2313
2314         fw_data = (const __le32 *)
2315                 (adev->gfx.mec_fw->data +
2316                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2317         tmp = 0;
2318         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2319         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2320         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2321
2322         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2323                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2324         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2325                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2326
2327         /* MEC1 */
2328         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2329                          mec_hdr->jt_offset);
2330         for (i = 0; i < mec_hdr->jt_size; i++)
2331                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2332                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2333
2334         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2335                         adev->gfx.mec_fw_version);
2336         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2337
2338         return 0;
2339 }
2340
2341 /* KIQ functions */
2342 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2343 {
2344         uint32_t tmp;
2345         struct amdgpu_device *adev = ring->adev;
2346
2347         /* tell RLC which is KIQ queue */
2348         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2349         tmp &= 0xffffff00;
2350         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2351         WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2352         tmp |= 0x80;
2353         WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2354 }
2355
2356 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2357 {
2358         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2359         uint32_t scratch, tmp = 0;
2360         uint64_t queue_mask = 0;
2361         int r, i;
2362
2363         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2364                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2365                         continue;
2366
2367                 /* This situation may be hit in the future if a new HW
2368                  * generation exposes more than 64 queues. If so, the
2369                  * definition of queue_mask needs updating */
2370                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2371                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2372                         break;
2373                 }
2374
2375                 queue_mask |= (1ull << i);
2376         }
2377
2378         r = amdgpu_gfx_scratch_get(adev, &scratch);
2379         if (r) {
2380                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
2381                 return r;
2382         }
2383         WREG32(scratch, 0xCAFEDEAD);
2384
2385         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11);
2386         if (r) {
2387                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2388                 amdgpu_gfx_scratch_free(adev, scratch);
2389                 return r;
2390         }
2391
2392         /* set resources */
2393         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2394         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2395                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2396         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2397         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2398         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2399         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2400         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2401         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2402         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2403                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2404                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2405                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2406
2407                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2408                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2409                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2410                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2411                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2412                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2413                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2414                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2415                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2416                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2417                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2418                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2419                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2420                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2421                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2422                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2423                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2424         }
2425         /* write to scratch for completion */
2426         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2427         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2428         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
2429         amdgpu_ring_commit(kiq_ring);
2430
2431         for (i = 0; i < adev->usec_timeout; i++) {
2432                 tmp = RREG32(scratch);
2433                 if (tmp == 0xDEADBEEF)
2434                         break;
2435                 DRM_UDELAY(1);
2436         }
2437         if (i >= adev->usec_timeout) {
2438                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
2439                           scratch, tmp);
2440                 r = -EINVAL;
2441         }
2442         amdgpu_gfx_scratch_free(adev, scratch);
2443
2444         return r;
2445 }
2446
2447 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2448 {
2449         struct amdgpu_device *adev = ring->adev;
2450         struct v9_mqd *mqd = ring->mqd_ptr;
2451         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2452         uint32_t tmp;
2453
2454         mqd->header = 0xC0310800;
2455         mqd->compute_pipelinestat_enable = 0x00000001;
2456         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2457         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2458         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2459         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2460         mqd->compute_misc_reserved = 0x00000003;
2461
2462         mqd->dynamic_cu_mask_addr_lo =
2463                 lower_32_bits(ring->mqd_gpu_addr
2464                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2465         mqd->dynamic_cu_mask_addr_hi =
2466                 upper_32_bits(ring->mqd_gpu_addr
2467                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2468
2469         eop_base_addr = ring->eop_gpu_addr >> 8;
2470         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2471         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2472
2473         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2474         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2475         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2476                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2477
2478         mqd->cp_hqd_eop_control = tmp;
2479
2480         /* enable doorbell? */
2481         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2482
2483         if (ring->use_doorbell) {
2484                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2485                                     DOORBELL_OFFSET, ring->doorbell_index);
2486                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2487                                     DOORBELL_EN, 1);
2488                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2489                                     DOORBELL_SOURCE, 0);
2490                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2491                                     DOORBELL_HIT, 0);
2492         } else {
2493                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2494                                          DOORBELL_EN, 0);
2495         }
2496
2497         mqd->cp_hqd_pq_doorbell_control = tmp;
2498
2499         /* disable the queue if it's active */
2500         ring->wptr = 0;
2501         mqd->cp_hqd_dequeue_request = 0;
2502         mqd->cp_hqd_pq_rptr = 0;
2503         mqd->cp_hqd_pq_wptr_lo = 0;
2504         mqd->cp_hqd_pq_wptr_hi = 0;
2505
2506         /* set the pointer to the MQD */
2507         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2508         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2509
2510         /* set MQD vmid to 0 */
2511         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2512         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2513         mqd->cp_mqd_control = tmp;
2514
2515         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2516         hqd_gpu_addr = ring->gpu_addr >> 8;
2517         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2518         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2519
2520         /* set up the HQD, this is similar to CP_RB0_CNTL */
2521         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2522         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2523                             (order_base_2(ring->ring_size / 4) - 1));
2524         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2525                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2526 #ifdef __BIG_ENDIAN
2527         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2528 #endif
2529         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2530         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2531         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2532         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2533         mqd->cp_hqd_pq_control = tmp;
2534
2535         /* set the wb address whether it's enabled or not */
2536         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2537         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2538         mqd->cp_hqd_pq_rptr_report_addr_hi =
2539                 upper_32_bits(wb_gpu_addr) & 0xffff;
2540
2541         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2542         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2543         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2544         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2545
2546         tmp = 0;
2547         /* enable the doorbell if requested */
2548         if (ring->use_doorbell) {
2549                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2550                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2551                                 DOORBELL_OFFSET, ring->doorbell_index);
2552
2553                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2554                                          DOORBELL_EN, 1);
2555                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2556                                          DOORBELL_SOURCE, 0);
2557                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2558                                          DOORBELL_HIT, 0);
2559         }
2560
2561         mqd->cp_hqd_pq_doorbell_control = tmp;
2562
2563         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2564         ring->wptr = 0;
2565         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2566
2567         /* set the vmid for the queue */
2568         mqd->cp_hqd_vmid = 0;
2569
2570         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2571         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2572         mqd->cp_hqd_persistent_state = tmp;
2573
2574         /* set MIN_IB_AVAIL_SIZE */
2575         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2576         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2577         mqd->cp_hqd_ib_control = tmp;
2578
2579         /* activate the queue */
2580         mqd->cp_hqd_active = 1;
2581
2582         return 0;
2583 }
2584
2585 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2586 {
2587         struct amdgpu_device *adev = ring->adev;
2588         struct v9_mqd *mqd = ring->mqd_ptr;
2589         int j;
2590
2591         /* disable wptr polling */
2592         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2593
2594         WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2595                mqd->cp_hqd_eop_base_addr_lo);
2596         WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2597                mqd->cp_hqd_eop_base_addr_hi);
2598
2599         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2600         WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
2601                mqd->cp_hqd_eop_control);
2602
2603         /* enable doorbell? */
2604         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2605                mqd->cp_hqd_pq_doorbell_control);
2606
2607         /* disable the queue if it's active */
2608         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2609                 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2610                 for (j = 0; j < adev->usec_timeout; j++) {
2611                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2612                                 break;
2613                         udelay(1);
2614                 }
2615                 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2616                        mqd->cp_hqd_dequeue_request);
2617                 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
2618                        mqd->cp_hqd_pq_rptr);
2619                 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
2620                        mqd->cp_hqd_pq_wptr_lo);
2621                 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
2622                        mqd->cp_hqd_pq_wptr_hi);
2623         }
2624
2625         /* set the pointer to the MQD */
2626         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
2627                mqd->cp_mqd_base_addr_lo);
2628         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
2629                mqd->cp_mqd_base_addr_hi);
2630
2631         /* set MQD vmid to 0 */
2632         WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
2633                mqd->cp_mqd_control);
2634
2635         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2636         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
2637                mqd->cp_hqd_pq_base_lo);
2638         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
2639                mqd->cp_hqd_pq_base_hi);
2640
2641         /* set up the HQD, this is similar to CP_RB0_CNTL */
2642         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
2643                mqd->cp_hqd_pq_control);
2644
2645         /* set the wb address whether it's enabled or not */
2646         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
2647                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
2648         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2649                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
2650
2651         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2652         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
2653                mqd->cp_hqd_pq_wptr_poll_addr_lo);
2654         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
2655                mqd->cp_hqd_pq_wptr_poll_addr_hi);
2656
2657         /* enable the doorbell if requested */
2658         if (ring->use_doorbell) {
2659                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
2660                                         (AMDGPU_DOORBELL64_KIQ *2) << 2);
2661                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
2662                                         (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
2663         }
2664
2665         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2666                mqd->cp_hqd_pq_doorbell_control);
2667
2668         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2669         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
2670                mqd->cp_hqd_pq_wptr_lo);
2671         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
2672                mqd->cp_hqd_pq_wptr_hi);
2673
2674         /* set the vmid for the queue */
2675         WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
2676
2677         WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
2678                mqd->cp_hqd_persistent_state);
2679
2680         /* activate the queue */
2681         WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
2682                mqd->cp_hqd_active);
2683
2684         if (ring->use_doorbell)
2685                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
2686
2687         return 0;
2688 }
2689
2690 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2691 {
2692         struct amdgpu_device *adev = ring->adev;
2693         struct v9_mqd *mqd = ring->mqd_ptr;
2694         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
2695
2696         gfx_v9_0_kiq_setting(ring);
2697
2698         if (adev->in_gpu_reset) { /* for GPU_RESET case */
2699                 /* reset MQD to a clean status */
2700                 if (adev->gfx.mec.mqd_backup[mqd_idx])
2701                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
2702
2703                 /* reset ring buffer */
2704                 ring->wptr = 0;
2705                 amdgpu_ring_clear_ring(ring);
2706
2707                 mutex_lock(&adev->srbm_mutex);
2708                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2709                 gfx_v9_0_kiq_init_register(ring);
2710                 soc15_grbm_select(adev, 0, 0, 0, 0);
2711                 mutex_unlock(&adev->srbm_mutex);
2712         } else {
2713                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2714                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2715                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
2716                 mutex_lock(&adev->srbm_mutex);
2717                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2718                 gfx_v9_0_mqd_init(ring);
2719                 gfx_v9_0_kiq_init_register(ring);
2720                 soc15_grbm_select(adev, 0, 0, 0, 0);
2721                 mutex_unlock(&adev->srbm_mutex);
2722
2723                 if (adev->gfx.mec.mqd_backup[mqd_idx])
2724                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2725         }
2726
2727         return 0;
2728 }
2729
2730 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
2731 {
2732         struct amdgpu_device *adev = ring->adev;
2733         struct v9_mqd *mqd = ring->mqd_ptr;
2734         int mqd_idx = ring - &adev->gfx.compute_ring[0];
2735
2736         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
2737                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2738                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2739                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
2740                 mutex_lock(&adev->srbm_mutex);
2741                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2742                 gfx_v9_0_mqd_init(ring);
2743                 soc15_grbm_select(adev, 0, 0, 0, 0);
2744                 mutex_unlock(&adev->srbm_mutex);
2745
2746                 if (adev->gfx.mec.mqd_backup[mqd_idx])
2747                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2748         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
2749                 /* reset MQD to a clean status */
2750                 if (adev->gfx.mec.mqd_backup[mqd_idx])
2751                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
2752
2753                 /* reset ring buffer */
2754                 ring->wptr = 0;
2755                 amdgpu_ring_clear_ring(ring);
2756         } else {
2757                 amdgpu_ring_clear_ring(ring);
2758         }
2759
2760         return 0;
2761 }
2762
2763 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
2764 {
2765         struct amdgpu_ring *ring = NULL;
2766         int r = 0, i;
2767
2768         gfx_v9_0_cp_compute_enable(adev, true);
2769
2770         ring = &adev->gfx.kiq.ring;
2771
2772         r = amdgpu_bo_reserve(ring->mqd_obj, false);
2773         if (unlikely(r != 0))
2774                 goto done;
2775
2776         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2777         if (!r) {
2778                 r = gfx_v9_0_kiq_init_queue(ring);
2779                 amdgpu_bo_kunmap(ring->mqd_obj);
2780                 ring->mqd_ptr = NULL;
2781         }
2782         amdgpu_bo_unreserve(ring->mqd_obj);
2783         if (r)
2784                 goto done;
2785
2786         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2787                 ring = &adev->gfx.compute_ring[i];
2788
2789                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2790                 if (unlikely(r != 0))
2791                         goto done;
2792                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2793                 if (!r) {
2794                         r = gfx_v9_0_kcq_init_queue(ring);
2795                         amdgpu_bo_kunmap(ring->mqd_obj);
2796                         ring->mqd_ptr = NULL;
2797                 }
2798                 amdgpu_bo_unreserve(ring->mqd_obj);
2799                 if (r)
2800                         goto done;
2801         }
2802
2803         r = gfx_v9_0_kiq_kcq_enable(adev);
2804 done:
2805         return r;
2806 }
2807
2808 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2809 {
2810         int r, i;
2811         struct amdgpu_ring *ring;
2812
2813         if (!(adev->flags & AMD_IS_APU))
2814                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2815
2816         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2817                 /* legacy firmware loading */
2818                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
2819                 if (r)
2820                         return r;
2821
2822                 r = gfx_v9_0_cp_compute_load_microcode(adev);
2823                 if (r)
2824                         return r;
2825         }
2826
2827         r = gfx_v9_0_cp_gfx_resume(adev);
2828         if (r)
2829                 return r;
2830
2831         r = gfx_v9_0_kiq_resume(adev);
2832         if (r)
2833                 return r;
2834
2835         ring = &adev->gfx.gfx_ring[0];
2836         r = amdgpu_ring_test_ring(ring);
2837         if (r) {
2838                 ring->ready = false;
2839                 return r;
2840         }
2841
2842         ring = &adev->gfx.kiq.ring;
2843         ring->ready = true;
2844         r = amdgpu_ring_test_ring(ring);
2845         if (r)
2846                 ring->ready = false;
2847
2848         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2849                 ring = &adev->gfx.compute_ring[i];
2850
2851                 ring->ready = true;
2852                 r = amdgpu_ring_test_ring(ring);
2853                 if (r)
2854                         ring->ready = false;
2855         }
2856
2857         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2858
2859         return 0;
2860 }
2861
2862 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
2863 {
2864         gfx_v9_0_cp_gfx_enable(adev, enable);
2865         gfx_v9_0_cp_compute_enable(adev, enable);
2866 }
2867
2868 static int gfx_v9_0_hw_init(void *handle)
2869 {
2870         int r;
2871         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2872
2873         gfx_v9_0_init_golden_registers(adev);
2874
2875         gfx_v9_0_gpu_init(adev);
2876
2877         r = gfx_v9_0_rlc_resume(adev);
2878         if (r)
2879                 return r;
2880
2881         r = gfx_v9_0_cp_resume(adev);
2882         if (r)
2883                 return r;
2884
2885         r = gfx_v9_0_ngg_en(adev);
2886         if (r)
2887                 return r;
2888
2889         return r;
2890 }
2891
2892 static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
2893 {
2894         struct amdgpu_device *adev = kiq_ring->adev;
2895         uint32_t scratch, tmp = 0;
2896         int r, i;
2897
2898         r = amdgpu_gfx_scratch_get(adev, &scratch);
2899         if (r) {
2900                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
2901                 return r;
2902         }
2903         WREG32(scratch, 0xCAFEDEAD);
2904
2905         r = amdgpu_ring_alloc(kiq_ring, 10);
2906         if (r) {
2907                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2908                 amdgpu_gfx_scratch_free(adev, scratch);
2909                 return r;
2910         }
2911
2912         /* unmap queues */
2913         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
2914         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2915                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
2916                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
2917                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
2918                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
2919         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
2920         amdgpu_ring_write(kiq_ring, 0);
2921         amdgpu_ring_write(kiq_ring, 0);
2922         amdgpu_ring_write(kiq_ring, 0);
2923         /* write to scratch for completion */
2924         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2925         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2926         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
2927         amdgpu_ring_commit(kiq_ring);
2928
2929         for (i = 0; i < adev->usec_timeout; i++) {
2930                 tmp = RREG32(scratch);
2931                 if (tmp == 0xDEADBEEF)
2932                         break;
2933                 DRM_UDELAY(1);
2934         }
2935         if (i >= adev->usec_timeout) {
2936                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
2937                 r = -EINVAL;
2938         }
2939         amdgpu_gfx_scratch_free(adev, scratch);
2940         return r;
2941 }
2942
2943
2944 static int gfx_v9_0_hw_fini(void *handle)
2945 {
2946         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2947         int i;
2948
2949         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
2950         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
2951
2952         /* disable KCQ to avoid CPC touch memory not valid anymore */
2953         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2954                 gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
2955
2956         if (amdgpu_sriov_vf(adev)) {
2957                 pr_debug("For SRIOV client, shouldn't do anything.\n");
2958                 return 0;
2959         }
2960         gfx_v9_0_cp_enable(adev, false);
2961         gfx_v9_0_rlc_stop(adev);
2962
2963         return 0;
2964 }
2965
2966 static int gfx_v9_0_suspend(void *handle)
2967 {
2968         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2969
2970         adev->gfx.in_suspend = true;
2971         return gfx_v9_0_hw_fini(adev);
2972 }
2973
2974 static int gfx_v9_0_resume(void *handle)
2975 {
2976         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2977         int r;
2978
2979         r = gfx_v9_0_hw_init(adev);
2980         adev->gfx.in_suspend = false;
2981         return r;
2982 }
2983
2984 static bool gfx_v9_0_is_idle(void *handle)
2985 {
2986         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2987
2988         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
2989                                 GRBM_STATUS, GUI_ACTIVE))
2990                 return false;
2991         else
2992                 return true;
2993 }
2994
2995 static int gfx_v9_0_wait_for_idle(void *handle)
2996 {
2997         unsigned i;
2998         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2999
3000         for (i = 0; i < adev->usec_timeout; i++) {
3001                 if (gfx_v9_0_is_idle(handle))
3002                         return 0;
3003                 udelay(1);
3004         }
3005         return -ETIMEDOUT;
3006 }
3007
3008 static int gfx_v9_0_soft_reset(void *handle)
3009 {
3010         u32 grbm_soft_reset = 0;
3011         u32 tmp;
3012         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3013
3014         /* GRBM_STATUS */
3015         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3016         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3017                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3018                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3019                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3020                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3021                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3022                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3023                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3024                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3025                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3026         }
3027
3028         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3029                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3030                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3031         }
3032
3033         /* GRBM_STATUS2 */
3034         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3035         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3036                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3037                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3038
3039
3040         if (grbm_soft_reset) {
3041                 /* stop the rlc */
3042                 gfx_v9_0_rlc_stop(adev);
3043
3044                 /* Disable GFX parsing/prefetching */
3045                 gfx_v9_0_cp_gfx_enable(adev, false);
3046
3047                 /* Disable MEC parsing/prefetching */
3048                 gfx_v9_0_cp_compute_enable(adev, false);
3049
3050                 if (grbm_soft_reset) {
3051                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3052                         tmp |= grbm_soft_reset;
3053                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3054                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3055                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3056
3057                         udelay(50);
3058
3059                         tmp &= ~grbm_soft_reset;
3060                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3061                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3062                 }
3063
3064                 /* Wait a little for things to settle down */
3065                 udelay(50);
3066         }
3067         return 0;
3068 }
3069
3070 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3071 {
3072         uint64_t clock;
3073
3074         mutex_lock(&adev->gfx.gpu_clock_mutex);
3075         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3076         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3077                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3078         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3079         return clock;
3080 }
3081
3082 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3083                                           uint32_t vmid,
3084                                           uint32_t gds_base, uint32_t gds_size,
3085                                           uint32_t gws_base, uint32_t gws_size,
3086                                           uint32_t oa_base, uint32_t oa_size)
3087 {
3088         struct amdgpu_device *adev = ring->adev;
3089
3090         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
3091         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
3092
3093         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
3094         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
3095
3096         oa_base = oa_base >> AMDGPU_OA_SHIFT;
3097         oa_size = oa_size >> AMDGPU_OA_SHIFT;
3098
3099         /* GDS Base */
3100         gfx_v9_0_write_data_to_reg(ring, 0, false,
3101                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3102                                    gds_base);
3103
3104         /* GDS Size */
3105         gfx_v9_0_write_data_to_reg(ring, 0, false,
3106                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3107                                    gds_size);
3108
3109         /* GWS */
3110         gfx_v9_0_write_data_to_reg(ring, 0, false,
3111                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3112                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3113
3114         /* OA */
3115         gfx_v9_0_write_data_to_reg(ring, 0, false,
3116                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3117                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3118 }
3119
3120 static int gfx_v9_0_early_init(void *handle)
3121 {
3122         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3123
3124         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3125         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3126         gfx_v9_0_set_ring_funcs(adev);
3127         gfx_v9_0_set_irq_funcs(adev);
3128         gfx_v9_0_set_gds_init(adev);
3129         gfx_v9_0_set_rlc_funcs(adev);
3130
3131         return 0;
3132 }
3133
3134 static int gfx_v9_0_late_init(void *handle)
3135 {
3136         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3137         int r;
3138
3139         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3140         if (r)
3141                 return r;
3142
3143         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3144         if (r)
3145                 return r;
3146
3147         return 0;
3148 }
3149
3150 static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3151 {
3152         uint32_t rlc_setting, data;
3153         unsigned i;
3154
3155         if (adev->gfx.rlc.in_safe_mode)
3156                 return;
3157
3158         /* if RLC is not enabled, do nothing */
3159         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3160         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3161                 return;
3162
3163         if (adev->cg_flags &
3164             (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
3165              AMD_CG_SUPPORT_GFX_3D_CGCG)) {
3166                 data = RLC_SAFE_MODE__CMD_MASK;
3167                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3168                 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3169
3170                 /* wait for RLC_SAFE_MODE */
3171                 for (i = 0; i < adev->usec_timeout; i++) {
3172                         if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3173                                 break;
3174                         udelay(1);
3175                 }
3176                 adev->gfx.rlc.in_safe_mode = true;
3177         }
3178 }
3179
3180 static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
3181 {
3182         uint32_t rlc_setting, data;
3183
3184         if (!adev->gfx.rlc.in_safe_mode)
3185                 return;
3186
3187         /* if RLC is not enabled, do nothing */
3188         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3189         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3190                 return;
3191
3192         if (adev->cg_flags &
3193             (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
3194                 /*
3195                  * Try to exit safe mode only if it is already in safe
3196                  * mode.
3197                  */
3198                 data = RLC_SAFE_MODE__CMD_MASK;
3199                 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3200                 adev->gfx.rlc.in_safe_mode = false;
3201         }
3202 }
3203
3204 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3205                                                 bool enable)
3206 {
3207         /* TODO: double check if we need to perform under safe mdoe */
3208         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3209
3210         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3211                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3212                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3213                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3214         } else {
3215                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3216                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3217         }
3218
3219         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3220 }
3221
3222 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3223                                                 bool enable)
3224 {
3225         /* TODO: double check if we need to perform under safe mode */
3226         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3227
3228         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3229                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3230         else
3231                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3232
3233         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3234                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3235         else
3236                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3237
3238         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3239 }
3240
3241 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3242                                                       bool enable)
3243 {
3244         uint32_t data, def;
3245
3246         /* It is disabled by HW by default */
3247         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3248                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3249                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3250                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
3251                           RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3252                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3253                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3254
3255                 /* only for Vega10 & Raven1 */
3256                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
3257
3258                 if (def != data)
3259                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3260
3261                 /* MGLS is a global flag to control all MGLS in GFX */
3262                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3263                         /* 2 - RLC memory Light sleep */
3264                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
3265                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3266                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3267                                 if (def != data)
3268                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
3269                         }
3270                         /* 3 - CP memory Light sleep */
3271                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3272                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3273                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3274                                 if (def != data)
3275                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
3276                         }
3277                 }
3278         } else {
3279                 /* 1 - MGCG_OVERRIDE */
3280                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3281                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
3282                          RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3283                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3284                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3285                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3286                 if (def != data)
3287                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3288
3289                 /* 2 - disable MGLS in RLC */
3290                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3291                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3292                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3293                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
3294                 }
3295
3296                 /* 3 - disable MGLS in CP */
3297                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3298                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3299                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3300                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
3301                 }
3302         }
3303 }
3304
3305 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
3306                                            bool enable)
3307 {
3308         uint32_t data, def;
3309
3310         adev->gfx.rlc.funcs->enter_safe_mode(adev);
3311
3312         /* Enable 3D CGCG/CGLS */
3313         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
3314                 /* write cmd to clear cgcg/cgls ov */
3315                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3316                 /* unset CGCG override */
3317                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3318                 /* update CGCG and CGLS override bits */
3319                 if (def != data)
3320                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3321                 /* enable 3Dcgcg FSM(0x0020003f) */
3322                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3323                 data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3324                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3325                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3326                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3327                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3328                 if (def != data)
3329                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
3330
3331                 /* set IDLE_POLL_COUNT(0x00900100) */
3332                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
3333                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3334                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3335                 if (def != data)
3336                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
3337         } else {
3338                 /* Disable CGCG/CGLS */
3339                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3340                 /* disable cgcg, cgls should be disabled */
3341                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
3342                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
3343                 /* disable cgcg and cgls in FSM */
3344                 if (def != data)
3345                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
3346         }
3347
3348         adev->gfx.rlc.funcs->exit_safe_mode(adev);
3349 }
3350
3351 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3352                                                       bool enable)
3353 {
3354         uint32_t def, data;
3355
3356         adev->gfx.rlc.funcs->enter_safe_mode(adev);
3357
3358         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3359                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3360                 /* unset CGCG override */
3361                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3362                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3363                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3364                 else
3365                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3366                 /* update CGCG and CGLS override bits */
3367                 if (def != data)
3368                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3369
3370                 /* enable cgcg FSM(0x0020003F) */
3371                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3372                 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3373                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3374                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3375                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3376                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3377                 if (def != data)
3378                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
3379
3380                 /* set IDLE_POLL_COUNT(0x00900100) */
3381                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
3382                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3383                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3384                 if (def != data)
3385                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
3386         } else {
3387                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3388                 /* reset CGCG/CGLS bits */
3389                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3390                 /* disable cgcg and cgls in FSM */
3391                 if (def != data)
3392                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
3393         }
3394
3395         adev->gfx.rlc.funcs->exit_safe_mode(adev);
3396 }
3397
3398 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
3399                                             bool enable)
3400 {
3401         if (enable) {
3402                 /* CGCG/CGLS should be enabled after MGCG/MGLS
3403                  * ===  MGCG + MGLS ===
3404                  */
3405                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
3406                 /* ===  CGCG /CGLS for GFX 3D Only === */
3407                 gfx_v9_0_update_3d_clock_gating(adev, enable);
3408                 /* ===  CGCG + CGLS === */
3409                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
3410         } else {
3411                 /* CGCG/CGLS should be disabled before MGCG/MGLS
3412                  * ===  CGCG + CGLS ===
3413                  */
3414                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
3415                 /* ===  CGCG /CGLS for GFX 3D Only === */
3416                 gfx_v9_0_update_3d_clock_gating(adev, enable);
3417                 /* ===  MGCG + MGLS === */
3418                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
3419         }
3420         return 0;
3421 }
3422
3423 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
3424         .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
3425         .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
3426 };
3427
3428 static int gfx_v9_0_set_powergating_state(void *handle,
3429                                           enum amd_powergating_state state)
3430 {
3431         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3432         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
3433
3434         switch (adev->asic_type) {
3435         case CHIP_RAVEN:
3436                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3437                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
3438                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
3439                 } else {
3440                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
3441                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
3442                 }
3443
3444                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3445                         gfx_v9_0_enable_cp_power_gating(adev, true);
3446                 else
3447                         gfx_v9_0_enable_cp_power_gating(adev, false);
3448
3449                 /* update gfx cgpg state */
3450                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
3451
3452                 /* update mgcg state */
3453                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
3454                 break;
3455         default:
3456                 break;
3457         }
3458
3459         return 0;
3460 }
3461
3462 static int gfx_v9_0_set_clockgating_state(void *handle,
3463                                           enum amd_clockgating_state state)
3464 {
3465         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3466
3467         if (amdgpu_sriov_vf(adev))
3468                 return 0;
3469
3470         switch (adev->asic_type) {
3471         case CHIP_VEGA10:
3472         case CHIP_RAVEN:
3473                 gfx_v9_0_update_gfx_clock_gating(adev,
3474                                                  state == AMD_CG_STATE_GATE ? true : false);
3475                 break;
3476         default:
3477                 break;
3478         }
3479         return 0;
3480 }
3481
3482 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
3483 {
3484         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3485         int data;
3486
3487         if (amdgpu_sriov_vf(adev))
3488                 *flags = 0;
3489
3490         /* AMD_CG_SUPPORT_GFX_MGCG */
3491         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3492         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
3493                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
3494
3495         /* AMD_CG_SUPPORT_GFX_CGCG */
3496         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3497         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
3498                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
3499
3500         /* AMD_CG_SUPPORT_GFX_CGLS */
3501         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
3502                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
3503
3504         /* AMD_CG_SUPPORT_GFX_RLC_LS */
3505         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3506         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
3507                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
3508
3509         /* AMD_CG_SUPPORT_GFX_CP_LS */
3510         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3511         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
3512                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
3513
3514         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
3515         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3516         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
3517                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
3518
3519         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
3520         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
3521                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
3522 }
3523
3524 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
3525 {
3526         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
3527 }
3528
3529 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
3530 {
3531         struct amdgpu_device *adev = ring->adev;
3532         u64 wptr;
3533
3534         /* XXX check if swapping is necessary on BE */
3535         if (ring->use_doorbell) {
3536                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
3537         } else {
3538                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
3539                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
3540         }
3541
3542         return wptr;
3543 }
3544
3545 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3546 {
3547         struct amdgpu_device *adev = ring->adev;
3548
3549         if (ring->use_doorbell) {
3550                 /* XXX check if swapping is necessary on BE */
3551                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3552                 WDOORBELL64(ring->doorbell_index, ring->wptr);
3553         } else {
3554                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3555                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3556         }
3557 }
3558
3559 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3560 {
3561         struct amdgpu_device *adev = ring->adev;
3562         u32 ref_and_mask, reg_mem_engine;
3563         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
3564
3565         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3566                 switch (ring->me) {
3567                 case 1:
3568                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
3569                         break;
3570                 case 2:
3571                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
3572                         break;
3573                 default:
3574                         return;
3575                 }
3576                 reg_mem_engine = 0;
3577         } else {
3578                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
3579                 reg_mem_engine = 1; /* pfp */
3580         }
3581
3582         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
3583                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
3584                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
3585                               ref_and_mask, ref_and_mask, 0x20);
3586 }
3587
3588 static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
3589 {
3590         struct amdgpu_device *adev = ring->adev;
3591
3592         gfx_v9_0_write_data_to_reg(ring, 0, true,
3593                                    SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
3594 }
3595
3596 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
3597                                       struct amdgpu_ib *ib,
3598                                       unsigned vmid, bool ctx_switch)
3599 {
3600         u32 header, control = 0;
3601
3602         if (ib->flags & AMDGPU_IB_FLAG_CE)
3603                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3604         else
3605                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3606
3607         control |= ib->length_dw | (vmid << 24);
3608
3609         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
3610                 control |= INDIRECT_BUFFER_PRE_ENB(1);
3611
3612                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
3613                         gfx_v9_0_ring_emit_de_meta(ring);
3614         }
3615
3616         amdgpu_ring_write(ring, header);
3617 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3618         amdgpu_ring_write(ring,
3619 #ifdef __BIG_ENDIAN
3620                 (2 << 0) |
3621 #endif
3622                 lower_32_bits(ib->gpu_addr));
3623         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3624         amdgpu_ring_write(ring, control);
3625 }
3626
3627 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
3628                                           struct amdgpu_ib *ib,
3629                                           unsigned vmid, bool ctx_switch)
3630 {
3631         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
3632
3633         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
3634         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3635         amdgpu_ring_write(ring,
3636 #ifdef __BIG_ENDIAN
3637                                 (2 << 0) |
3638 #endif
3639                                 lower_32_bits(ib->gpu_addr));
3640         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3641         amdgpu_ring_write(ring, control);
3642 }
3643
3644 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3645                                      u64 seq, unsigned flags)
3646 {
3647         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3648         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
3649
3650         /* RELEASE_MEM - flush caches, send int */
3651         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3652         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
3653                                  EOP_TC_ACTION_EN |
3654                                  EOP_TC_WB_ACTION_EN |
3655                                  EOP_TC_MD_ACTION_EN |
3656                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3657                                  EVENT_INDEX(5)));
3658         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
3659
3660         /*
3661          * the address should be Qword aligned if 64bit write, Dword
3662          * aligned if only send 32bit data low (discard data high)
3663          */
3664         if (write64bit)
3665                 BUG_ON(addr & 0x7);
3666         else
3667                 BUG_ON(addr & 0x3);
3668         amdgpu_ring_write(ring, lower_32_bits(addr));
3669         amdgpu_ring_write(ring, upper_32_bits(addr));
3670         amdgpu_ring_write(ring, lower_32_bits(seq));
3671         amdgpu_ring_write(ring, upper_32_bits(seq));
3672         amdgpu_ring_write(ring, 0);
3673 }
3674
3675 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3676 {
3677         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3678         uint32_t seq = ring->fence_drv.sync_seq;
3679         uint64_t addr = ring->fence_drv.gpu_addr;
3680
3681         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
3682                               lower_32_bits(addr), upper_32_bits(addr),
3683                               seq, 0xffffffff, 4);
3684 }
3685
3686 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3687                                         unsigned vmid, uint64_t pd_addr)
3688 {
3689         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
3690         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3691         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
3692         uint64_t flags = AMDGPU_PTE_VALID;
3693         unsigned eng = ring->vm_inv_eng;
3694
3695         amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
3696         pd_addr |= flags;
3697
3698         gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3699                                    hub->ctx0_ptb_addr_lo32 + (2 * vmid),
3700                                    lower_32_bits(pd_addr));
3701
3702         gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3703                                    hub->ctx0_ptb_addr_hi32 + (2 * vmid),
3704                                    upper_32_bits(pd_addr));
3705
3706         gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3707                                    hub->vm_inv_eng0_req + eng, req);
3708
3709         /* wait for the invalidate to complete */
3710         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
3711                               eng, 0, 1 << vmid, 1 << vmid, 0x20);
3712
3713         /* compute doesn't have PFP */
3714         if (usepfp) {
3715                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3716                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3717                 amdgpu_ring_write(ring, 0x0);
3718         }
3719 }
3720
3721 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3722 {
3723         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
3724 }
3725
3726 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3727 {
3728         u64 wptr;
3729
3730         /* XXX check if swapping is necessary on BE */
3731         if (ring->use_doorbell)
3732                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
3733         else
3734                 BUG();
3735         return wptr;
3736 }
3737
3738 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3739 {
3740         struct amdgpu_device *adev = ring->adev;
3741
3742         /* XXX check if swapping is necessary on BE */
3743         if (ring->use_doorbell) {
3744                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3745                 WDOORBELL64(ring->doorbell_index, ring->wptr);
3746         } else{
3747                 BUG(); /* only DOORBELL method supported on gfx9 now */
3748         }
3749 }
3750
3751 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
3752                                          u64 seq, unsigned int flags)
3753 {
3754         struct amdgpu_device *adev = ring->adev;
3755
3756         /* we only allocate 32bit for each seq wb address */
3757         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
3758
3759         /* write fence seq to the "addr" */
3760         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3761         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3762                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
3763         amdgpu_ring_write(ring, lower_32_bits(addr));
3764         amdgpu_ring_write(ring, upper_32_bits(addr));
3765         amdgpu_ring_write(ring, lower_32_bits(seq));
3766
3767         if (flags & AMDGPU_FENCE_FLAG_INT) {
3768                 /* set register to trigger INT */
3769                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3770                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3771                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
3772                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
3773                 amdgpu_ring_write(ring, 0);
3774                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
3775         }
3776 }
3777
3778 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
3779 {
3780         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3781         amdgpu_ring_write(ring, 0);
3782 }
3783
3784 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
3785 {
3786         struct v9_ce_ib_state ce_payload = {0};
3787         uint64_t csa_addr;
3788         int cnt;
3789
3790         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
3791         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
3792
3793         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
3794         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
3795                                  WRITE_DATA_DST_SEL(8) |
3796                                  WR_CONFIRM) |
3797                                  WRITE_DATA_CACHE_POLICY(0));
3798         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
3799         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
3800         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
3801 }
3802
3803 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
3804 {
3805         struct v9_de_ib_state de_payload = {0};
3806         uint64_t csa_addr, gds_addr;
3807         int cnt;
3808
3809         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
3810         gds_addr = csa_addr + 4096;
3811         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
3812         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
3813
3814         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
3815         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
3816         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
3817                                  WRITE_DATA_DST_SEL(8) |
3818                                  WR_CONFIRM) |
3819                                  WRITE_DATA_CACHE_POLICY(0));
3820         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
3821         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
3822         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
3823 }
3824
3825 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
3826 {
3827         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
3828         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
3829 }
3830
3831 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
3832 {
3833         uint32_t dw2 = 0;
3834
3835         if (amdgpu_sriov_vf(ring->adev))
3836                 gfx_v9_0_ring_emit_ce_meta(ring);
3837
3838         gfx_v9_0_ring_emit_tmz(ring, true);
3839
3840         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
3841         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
3842                 /* set load_global_config & load_global_uconfig */
3843                 dw2 |= 0x8001;
3844                 /* set load_cs_sh_regs */
3845                 dw2 |= 0x01000000;
3846                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
3847                 dw2 |= 0x10002;
3848
3849                 /* set load_ce_ram if preamble presented */
3850                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
3851                         dw2 |= 0x10000000;
3852         } else {
3853                 /* still load_ce_ram if this is the first time preamble presented
3854                  * although there is no context switch happens.
3855                  */
3856                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
3857                         dw2 |= 0x10000000;
3858         }
3859
3860         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3861         amdgpu_ring_write(ring, dw2);
3862         amdgpu_ring_write(ring, 0);
3863 }
3864
3865 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
3866 {
3867         unsigned ret;
3868         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
3869         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
3870         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
3871         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
3872         ret = ring->wptr & ring->buf_mask;
3873         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
3874         return ret;
3875 }
3876
3877 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
3878 {
3879         unsigned cur;
3880         BUG_ON(offset > ring->buf_mask);
3881         BUG_ON(ring->ring[offset] != 0x55aa55aa);
3882
3883         cur = (ring->wptr & ring->buf_mask) - 1;
3884         if (likely(cur > offset))
3885                 ring->ring[offset] = cur - offset;
3886         else
3887                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
3888 }
3889
3890 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
3891 {
3892         struct amdgpu_device *adev = ring->adev;
3893
3894         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3895         amdgpu_ring_write(ring, 0 |     /* src: register*/
3896                                 (5 << 8) |      /* dst: memory */
3897                                 (1 << 20));     /* write confirm */
3898         amdgpu_ring_write(ring, reg);
3899         amdgpu_ring_write(ring, 0);
3900         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3901                                 adev->virt.reg_val_offs * 4));
3902         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3903                                 adev->virt.reg_val_offs * 4));
3904 }
3905
3906 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
3907                                   uint32_t val)
3908 {
3909         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3910         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
3911         amdgpu_ring_write(ring, reg);
3912         amdgpu_ring_write(ring, 0);
3913         amdgpu_ring_write(ring, val);
3914 }
3915
3916 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
3917                                                  enum amdgpu_interrupt_state state)
3918 {
3919         switch (state) {
3920         case AMDGPU_IRQ_STATE_DISABLE:
3921         case AMDGPU_IRQ_STATE_ENABLE:
3922                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
3923                                TIME_STAMP_INT_ENABLE,
3924                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
3925                 break;
3926         default:
3927                 break;
3928         }
3929 }
3930
3931 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3932                                                      int me, int pipe,
3933                                                      enum amdgpu_interrupt_state state)
3934 {
3935         u32 mec_int_cntl, mec_int_cntl_reg;
3936
3937         /*
3938          * amdgpu controls only the first MEC. That's why this function only
3939          * handles the setting of interrupts for this specific MEC. All other
3940          * pipes' interrupts are set by amdkfd.
3941          */
3942
3943         if (me == 1) {
3944                 switch (pipe) {
3945                 case 0:
3946                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
3947                         break;
3948                 case 1:
3949                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
3950                         break;
3951                 case 2:
3952                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
3953                         break;
3954                 case 3:
3955                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
3956                         break;
3957                 default:
3958                         DRM_DEBUG("invalid pipe %d\n", pipe);
3959                         return;
3960                 }
3961         } else {
3962                 DRM_DEBUG("invalid me %d\n", me);
3963                 return;
3964         }
3965
3966         switch (state) {
3967         case AMDGPU_IRQ_STATE_DISABLE:
3968                 mec_int_cntl = RREG32(mec_int_cntl_reg);
3969                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3970                                              TIME_STAMP_INT_ENABLE, 0);
3971                 WREG32(mec_int_cntl_reg, mec_int_cntl);
3972                 break;
3973         case AMDGPU_IRQ_STATE_ENABLE:
3974                 mec_int_cntl = RREG32(mec_int_cntl_reg);
3975                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3976                                              TIME_STAMP_INT_ENABLE, 1);
3977                 WREG32(mec_int_cntl_reg, mec_int_cntl);
3978                 break;
3979         default:
3980                 break;
3981         }
3982 }
3983
3984 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
3985                                              struct amdgpu_irq_src *source,
3986                                              unsigned type,
3987                                              enum amdgpu_interrupt_state state)
3988 {
3989         switch (state) {
3990         case AMDGPU_IRQ_STATE_DISABLE:
3991         case AMDGPU_IRQ_STATE_ENABLE:
3992                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
3993                                PRIV_REG_INT_ENABLE,
3994                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
3995                 break;
3996         default:
3997                 break;
3998         }
3999
4000         return 0;
4001 }
4002
4003 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4004                                               struct amdgpu_irq_src *source,
4005                                               unsigned type,
4006                                               enum amdgpu_interrupt_state state)
4007 {
4008         switch (state) {
4009         case AMDGPU_IRQ_STATE_DISABLE:
4010         case AMDGPU_IRQ_STATE_ENABLE:
4011                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4012                                PRIV_INSTR_INT_ENABLE,
4013                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4014         default:
4015                 break;
4016         }
4017
4018         return 0;
4019 }
4020
4021 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4022                                             struct amdgpu_irq_src *src,
4023                                             unsigned type,
4024                                             enum amdgpu_interrupt_state state)
4025 {
4026         switch (type) {
4027         case AMDGPU_CP_IRQ_GFX_EOP:
4028                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4029                 break;
4030         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4031                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4032                 break;
4033         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4034                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4035                 break;
4036         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4037                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4038                 break;
4039         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4040                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4041                 break;
4042         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4043                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4044                 break;
4045         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4046                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4047                 break;
4048         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4049                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4050                 break;
4051         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4052                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4053                 break;
4054         default:
4055                 break;
4056         }
4057         return 0;
4058 }
4059
4060 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
4061                             struct amdgpu_irq_src *source,
4062                             struct amdgpu_iv_entry *entry)
4063 {
4064         int i;
4065         u8 me_id, pipe_id, queue_id;
4066         struct amdgpu_ring *ring;
4067
4068         DRM_DEBUG("IH: CP EOP\n");
4069         me_id = (entry->ring_id & 0x0c) >> 2;
4070         pipe_id = (entry->ring_id & 0x03) >> 0;
4071         queue_id = (entry->ring_id & 0x70) >> 4;
4072
4073         switch (me_id) {
4074         case 0:
4075                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4076                 break;
4077         case 1:
4078         case 2:
4079                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4080                         ring = &adev->gfx.compute_ring[i];
4081                         /* Per-queue interrupt is supported for MEC starting from VI.
4082                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
4083                           */
4084                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4085                                 amdgpu_fence_process(ring);
4086                 }
4087                 break;
4088         }
4089         return 0;
4090 }
4091
4092 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
4093                                  struct amdgpu_irq_src *source,
4094                                  struct amdgpu_iv_entry *entry)
4095 {
4096         DRM_ERROR("Illegal register access in command stream\n");
4097         schedule_work(&adev->reset_work);
4098         return 0;
4099 }
4100
4101 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
4102                                   struct amdgpu_irq_src *source,
4103                                   struct amdgpu_iv_entry *entry)
4104 {
4105         DRM_ERROR("Illegal instruction in command stream\n");
4106         schedule_work(&adev->reset_work);
4107         return 0;
4108 }
4109
4110 static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
4111                                             struct amdgpu_irq_src *src,
4112                                             unsigned int type,
4113                                             enum amdgpu_interrupt_state state)
4114 {
4115         uint32_t tmp, target;
4116         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4117
4118         if (ring->me == 1)
4119                 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4120         else
4121                 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
4122         target += ring->pipe;
4123
4124         switch (type) {
4125         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
4126                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
4127                         tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
4128                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
4129                                                  GENERIC2_INT_ENABLE, 0);
4130                         WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
4131
4132                         tmp = RREG32(target);
4133                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
4134                                                  GENERIC2_INT_ENABLE, 0);
4135                         WREG32(target, tmp);
4136                 } else {
4137                         tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
4138                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
4139                                                  GENERIC2_INT_ENABLE, 1);
4140                         WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
4141
4142                         tmp = RREG32(target);
4143                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
4144                                                  GENERIC2_INT_ENABLE, 1);
4145                         WREG32(target, tmp);
4146                 }
4147                 break;
4148         default:
4149                 BUG(); /* kiq only support GENERIC2_INT now */
4150                 break;
4151         }
4152         return 0;
4153 }
4154
4155 static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
4156                             struct amdgpu_irq_src *source,
4157                             struct amdgpu_iv_entry *entry)
4158 {
4159         u8 me_id, pipe_id, queue_id;
4160         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4161
4162         me_id = (entry->ring_id & 0x0c) >> 2;
4163         pipe_id = (entry->ring_id & 0x03) >> 0;
4164         queue_id = (entry->ring_id & 0x70) >> 4;
4165         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
4166                    me_id, pipe_id, queue_id);
4167
4168         amdgpu_fence_process(ring);
4169         return 0;
4170 }
4171
4172 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
4173         .name = "gfx_v9_0",
4174         .early_init = gfx_v9_0_early_init,
4175         .late_init = gfx_v9_0_late_init,
4176         .sw_init = gfx_v9_0_sw_init,
4177         .sw_fini = gfx_v9_0_sw_fini,
4178         .hw_init = gfx_v9_0_hw_init,
4179         .hw_fini = gfx_v9_0_hw_fini,
4180         .suspend = gfx_v9_0_suspend,
4181         .resume = gfx_v9_0_resume,
4182         .is_idle = gfx_v9_0_is_idle,
4183         .wait_for_idle = gfx_v9_0_wait_for_idle,
4184         .soft_reset = gfx_v9_0_soft_reset,
4185         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
4186         .set_powergating_state = gfx_v9_0_set_powergating_state,
4187         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
4188 };
4189
4190 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
4191         .type = AMDGPU_RING_TYPE_GFX,
4192         .align_mask = 0xff,
4193         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
4194         .support_64bit_ptrs = true,
4195         .vmhub = AMDGPU_GFXHUB,
4196         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
4197         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
4198         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
4199         .emit_frame_size = /* totally 242 maximum if 16 IBs */
4200                 5 +  /* COND_EXEC */
4201                 7 +  /* PIPELINE_SYNC */
4202                 24 + /* VM_FLUSH */
4203                 8 +  /* FENCE for VM_FLUSH */
4204                 20 + /* GDS switch */
4205                 4 + /* double SWITCH_BUFFER,
4206                        the first COND_EXEC jump to the place just
4207                            prior to this double SWITCH_BUFFER  */
4208                 5 + /* COND_EXEC */
4209                 7 +      /*     HDP_flush */
4210                 4 +      /*     VGT_flush */
4211                 14 + /* CE_META */
4212                 31 + /* DE_META */
4213                 3 + /* CNTX_CTRL */
4214                 5 + /* HDP_INVL */
4215                 8 + 8 + /* FENCE x2 */
4216                 2, /* SWITCH_BUFFER */
4217         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
4218         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
4219         .emit_fence = gfx_v9_0_ring_emit_fence,
4220         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
4221         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4222         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4223         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4224         .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
4225         .test_ring = gfx_v9_0_ring_test_ring,
4226         .test_ib = gfx_v9_0_ring_test_ib,
4227         .insert_nop = amdgpu_ring_insert_nop,
4228         .pad_ib = amdgpu_ring_generic_pad_ib,
4229         .emit_switch_buffer = gfx_v9_ring_emit_sb,
4230         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
4231         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
4232         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
4233         .emit_tmz = gfx_v9_0_ring_emit_tmz,
4234 };
4235
4236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4237         .type = AMDGPU_RING_TYPE_COMPUTE,
4238         .align_mask = 0xff,
4239         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
4240         .support_64bit_ptrs = true,
4241         .vmhub = AMDGPU_GFXHUB,
4242         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
4243         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
4244         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
4245         .emit_frame_size =
4246                 20 + /* gfx_v9_0_ring_emit_gds_switch */
4247                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4248                 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
4249                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4250                 24 + /* gfx_v9_0_ring_emit_vm_flush */
4251                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
4252         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
4253         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
4254         .emit_fence = gfx_v9_0_ring_emit_fence,
4255         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
4256         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4257         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4258         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4259         .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
4260         .test_ring = gfx_v9_0_ring_test_ring,
4261         .test_ib = gfx_v9_0_ring_test_ib,
4262         .insert_nop = amdgpu_ring_insert_nop,
4263         .pad_ib = amdgpu_ring_generic_pad_ib,
4264 };
4265
4266 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4267         .type = AMDGPU_RING_TYPE_KIQ,
4268         .align_mask = 0xff,
4269         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
4270         .support_64bit_ptrs = true,
4271         .vmhub = AMDGPU_GFXHUB,
4272         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
4273         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
4274         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
4275         .emit_frame_size =
4276                 20 + /* gfx_v9_0_ring_emit_gds_switch */
4277                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4278                 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
4279                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4280                 24 + /* gfx_v9_0_ring_emit_vm_flush */
4281                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4282         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
4283         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
4284         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
4285         .test_ring = gfx_v9_0_ring_test_ring,
4286         .test_ib = gfx_v9_0_ring_test_ib,
4287         .insert_nop = amdgpu_ring_insert_nop,
4288         .pad_ib = amdgpu_ring_generic_pad_ib,
4289         .emit_rreg = gfx_v9_0_ring_emit_rreg,
4290         .emit_wreg = gfx_v9_0_ring_emit_wreg,
4291 };
4292
4293 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
4294 {
4295         int i;
4296
4297         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
4298
4299         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4300                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
4301
4302         for (i = 0; i < adev->gfx.num_compute_rings; i++)
4303                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
4304 }
4305
4306 static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = {
4307         .set = gfx_v9_0_kiq_set_interrupt_state,
4308         .process = gfx_v9_0_kiq_irq,
4309 };
4310
4311 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
4312         .set = gfx_v9_0_set_eop_interrupt_state,
4313         .process = gfx_v9_0_eop_irq,
4314 };
4315
4316 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
4317         .set = gfx_v9_0_set_priv_reg_fault_state,
4318         .process = gfx_v9_0_priv_reg_irq,
4319 };
4320
4321 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
4322         .set = gfx_v9_0_set_priv_inst_fault_state,
4323         .process = gfx_v9_0_priv_inst_irq,
4324 };
4325
4326 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
4327 {
4328         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4329         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
4330
4331         adev->gfx.priv_reg_irq.num_types = 1;
4332         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
4333
4334         adev->gfx.priv_inst_irq.num_types = 1;
4335         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
4336
4337         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
4338         adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs;
4339 }
4340
4341 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
4342 {
4343         switch (adev->asic_type) {
4344         case CHIP_VEGA10:
4345         case CHIP_RAVEN:
4346                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
4347                 break;
4348         default:
4349                 break;
4350         }
4351 }
4352
4353 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
4354 {
4355         /* init asci gds info */
4356         adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
4357         adev->gds.gws.total_size = 64;
4358         adev->gds.oa.total_size = 16;
4359
4360         if (adev->gds.mem.total_size == 64 * 1024) {
4361                 adev->gds.mem.gfx_partition_size = 4096;
4362                 adev->gds.mem.cs_partition_size = 4096;
4363
4364                 adev->gds.gws.gfx_partition_size = 4;
4365                 adev->gds.gws.cs_partition_size = 4;
4366
4367                 adev->gds.oa.gfx_partition_size = 4;
4368                 adev->gds.oa.cs_partition_size = 1;
4369         } else {
4370                 adev->gds.mem.gfx_partition_size = 1024;
4371                 adev->gds.mem.cs_partition_size = 1024;
4372
4373                 adev->gds.gws.gfx_partition_size = 16;
4374                 adev->gds.gws.cs_partition_size = 16;
4375
4376                 adev->gds.oa.gfx_partition_size = 4;
4377                 adev->gds.oa.cs_partition_size = 4;
4378         }
4379 }
4380
4381 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
4382                                                  u32 bitmap)
4383 {
4384         u32 data;
4385
4386         if (!bitmap)
4387                 return;
4388
4389         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4390         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4391
4392         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
4393 }
4394
4395 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
4396 {
4397         u32 data, mask;
4398
4399         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
4400         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
4401
4402         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4403         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4404
4405         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
4406
4407         return (~data) & mask;
4408 }
4409
4410 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
4411                                  struct amdgpu_cu_info *cu_info)
4412 {
4413         int i, j, k, counter, active_cu_number = 0;
4414         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4415         unsigned disable_masks[4 * 2];
4416
4417         if (!adev || !cu_info)
4418                 return -EINVAL;
4419
4420         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
4421
4422         mutex_lock(&adev->grbm_idx_mutex);
4423         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4424                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4425                         mask = 1;
4426                         ao_bitmap = 0;
4427                         counter = 0;
4428                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
4429                         if (i < 4 && j < 2)
4430                                 gfx_v9_0_set_user_cu_inactive_bitmap(
4431                                         adev, disable_masks[i * 2 + j]);
4432                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
4433                         cu_info->bitmap[i][j] = bitmap;
4434
4435                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4436                                 if (bitmap & mask) {
4437                                         if (counter < adev->gfx.config.max_cu_per_sh)
4438                                                 ao_bitmap |= mask;
4439                                         counter ++;
4440                                 }
4441                                 mask <<= 1;
4442                         }
4443                         active_cu_number += counter;
4444                         if (i < 2 && j < 2)
4445                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4446                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
4447                 }
4448         }
4449         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
4450         mutex_unlock(&adev->grbm_idx_mutex);
4451
4452         cu_info->number = active_cu_number;
4453         cu_info->ao_cu_mask = ao_cu_mask;
4454
4455         return 0;
4456 }
4457
4458 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
4459 {
4460         .type = AMD_IP_BLOCK_TYPE_GFX,
4461         .major = 9,
4462         .minor = 0,
4463         .rev = 0,
4464         .funcs = &gfx_v9_0_ip_funcs,
4465 };