Merge tag 'armsoc-soc' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316         switch (adev->asic_type) {
317         case CHIP_VEGA10:
318                 if (!amdgpu_virt_support_skip_setting(adev)) {
319                         soc15_program_register_sequence(adev,
320                                                          golden_settings_gc_9_0,
321                                                          ARRAY_SIZE(golden_settings_gc_9_0));
322                         soc15_program_register_sequence(adev,
323                                                          golden_settings_gc_9_0_vg10,
324                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325                 }
326                 break;
327         case CHIP_VEGA12:
328                 soc15_program_register_sequence(adev,
329                                                 golden_settings_gc_9_2_1,
330                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
331                 soc15_program_register_sequence(adev,
332                                                 golden_settings_gc_9_2_1_vg12,
333                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334                 break;
335         case CHIP_VEGA20:
336                 soc15_program_register_sequence(adev,
337                                                 golden_settings_gc_9_0,
338                                                 ARRAY_SIZE(golden_settings_gc_9_0));
339                 soc15_program_register_sequence(adev,
340                                                 golden_settings_gc_9_0_vg20,
341                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342                 break;
343         case CHIP_RAVEN:
344                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345                                                 ARRAY_SIZE(golden_settings_gc_9_1));
346                 if (adev->rev_id >= 8)
347                         soc15_program_register_sequence(adev,
348                                                         golden_settings_gc_9_1_rv2,
349                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350                 else
351                         soc15_program_register_sequence(adev,
352                                                         golden_settings_gc_9_1_rv1,
353                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354                 break;
355         default:
356                 break;
357         }
358
359         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365         adev->gfx.scratch.num_reg = 8;
366         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371                                        bool wc, uint32_t reg, uint32_t val)
372 {
373         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375                                 WRITE_DATA_DST_SEL(0) |
376                                 (wc ? WR_CONFIRM : 0));
377         amdgpu_ring_write(ring, reg);
378         amdgpu_ring_write(ring, 0);
379         amdgpu_ring_write(ring, val);
380 }
381
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383                                   int mem_space, int opt, uint32_t addr0,
384                                   uint32_t addr1, uint32_t ref, uint32_t mask,
385                                   uint32_t inv)
386 {
387         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388         amdgpu_ring_write(ring,
389                                  /* memory (1) or register (0) */
390                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
392                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393                                  WAIT_REG_MEM_ENGINE(eng_sel)));
394
395         if (mem_space)
396                 BUG_ON(addr0 & 0x3); /* Dword align */
397         amdgpu_ring_write(ring, addr0);
398         amdgpu_ring_write(ring, addr1);
399         amdgpu_ring_write(ring, ref);
400         amdgpu_ring_write(ring, mask);
401         amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406         struct amdgpu_device *adev = ring->adev;
407         uint32_t scratch;
408         uint32_t tmp = 0;
409         unsigned i;
410         int r;
411
412         r = amdgpu_gfx_scratch_get(adev, &scratch);
413         if (r)
414                 return r;
415
416         WREG32(scratch, 0xCAFEDEAD);
417         r = amdgpu_ring_alloc(ring, 3);
418         if (r)
419                 goto error_free_scratch;
420
421         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423         amdgpu_ring_write(ring, 0xDEADBEEF);
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 udelay(1);
431         }
432
433         if (i >= adev->usec_timeout)
434                 r = -ETIMEDOUT;
435
436 error_free_scratch:
437         amdgpu_gfx_scratch_free(adev, scratch);
438         return r;
439 }
440
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446
447         unsigned index;
448         uint64_t gpu_addr;
449         uint32_t tmp;
450         long r;
451
452         r = amdgpu_device_wb_get(adev, &index);
453         if (r)
454                 return r;
455
456         gpu_addr = adev->wb.gpu_addr + (index * 4);
457         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458         memset(&ib, 0, sizeof(ib));
459         r = amdgpu_ib_get(adev, NULL, 16, &ib);
460         if (r)
461                 goto err1;
462
463         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465         ib.ptr[2] = lower_32_bits(gpu_addr);
466         ib.ptr[3] = upper_32_bits(gpu_addr);
467         ib.ptr[4] = 0xDEADBEEF;
468         ib.length_dw = 5;
469
470         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471         if (r)
472                 goto err2;
473
474         r = dma_fence_wait_timeout(f, false, timeout);
475         if (r == 0) {
476                 r = -ETIMEDOUT;
477                 goto err2;
478         } else if (r < 0) {
479                 goto err2;
480         }
481
482         tmp = adev->wb.wb[index];
483         if (tmp == 0xDEADBEEF)
484                 r = 0;
485         else
486                 r = -EINVAL;
487
488 err2:
489         amdgpu_ib_free(adev, &ib, NULL);
490         dma_fence_put(f);
491 err1:
492         amdgpu_device_wb_free(adev, index);
493         return r;
494 }
495
496
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499         release_firmware(adev->gfx.pfp_fw);
500         adev->gfx.pfp_fw = NULL;
501         release_firmware(adev->gfx.me_fw);
502         adev->gfx.me_fw = NULL;
503         release_firmware(adev->gfx.ce_fw);
504         adev->gfx.ce_fw = NULL;
505         release_firmware(adev->gfx.rlc_fw);
506         adev->gfx.rlc_fw = NULL;
507         release_firmware(adev->gfx.mec_fw);
508         adev->gfx.mec_fw = NULL;
509         release_firmware(adev->gfx.mec2_fw);
510         adev->gfx.mec2_fw = NULL;
511
512         kfree(adev->gfx.rlc.register_list_format);
513 }
514
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517         const struct rlc_firmware_header_v2_1 *rlc_hdr;
518
519         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538         adev->gfx.me_fw_write_wait = false;
539         adev->gfx.mec_fw_write_wait = false;
540
541         switch (adev->asic_type) {
542         case CHIP_VEGA10:
543                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544                     (adev->gfx.me_feature_version >= 42) &&
545                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546                     (adev->gfx.pfp_feature_version >= 42))
547                         adev->gfx.me_fw_write_wait = true;
548
549                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550                     (adev->gfx.mec_feature_version >= 42))
551                         adev->gfx.mec_fw_write_wait = true;
552                 break;
553         case CHIP_VEGA12:
554                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555                     (adev->gfx.me_feature_version >= 44) &&
556                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557                     (adev->gfx.pfp_feature_version >= 44))
558                         adev->gfx.me_fw_write_wait = true;
559
560                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561                     (adev->gfx.mec_feature_version >= 44))
562                         adev->gfx.mec_fw_write_wait = true;
563                 break;
564         case CHIP_VEGA20:
565                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566                     (adev->gfx.me_feature_version >= 44) &&
567                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568                     (adev->gfx.pfp_feature_version >= 44))
569                         adev->gfx.me_fw_write_wait = true;
570
571                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572                     (adev->gfx.mec_feature_version >= 44))
573                         adev->gfx.mec_fw_write_wait = true;
574                 break;
575         case CHIP_RAVEN:
576                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577                     (adev->gfx.me_feature_version >= 42) &&
578                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579                     (adev->gfx.pfp_feature_version >= 42))
580                         adev->gfx.me_fw_write_wait = true;
581
582                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583                     (adev->gfx.mec_feature_version >= 42))
584                         adev->gfx.mec_fw_write_wait = true;
585                 break;
586         default:
587                 break;
588         }
589 }
590
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593         switch (adev->asic_type) {
594         case CHIP_VEGA10:
595         case CHIP_VEGA12:
596         case CHIP_VEGA20:
597                 break;
598         case CHIP_RAVEN:
599                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600                         &&((adev->gfx.rlc_fw_version != 106 &&
601                              adev->gfx.rlc_fw_version < 531) ||
602                             (adev->gfx.rlc_fw_version == 53815) ||
603                             (adev->gfx.rlc_feature_version < 1) ||
604                             !adev->gfx.rlc.is_rlc_v2_1))
605                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
606
607                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
608                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
609                                 AMD_PG_SUPPORT_CP |
610                                 AMD_PG_SUPPORT_RLC_SMU_HS;
611                 break;
612         default:
613                 break;
614         }
615 }
616
617 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
618 {
619         const char *chip_name;
620         char fw_name[30];
621         int err;
622         struct amdgpu_firmware_info *info = NULL;
623         const struct common_firmware_header *header = NULL;
624         const struct gfx_firmware_header_v1_0 *cp_hdr;
625         const struct rlc_firmware_header_v2_0 *rlc_hdr;
626         unsigned int *tmp = NULL;
627         unsigned int i = 0;
628         uint16_t version_major;
629         uint16_t version_minor;
630         uint32_t smu_version;
631
632         DRM_DEBUG("\n");
633
634         switch (adev->asic_type) {
635         case CHIP_VEGA10:
636                 chip_name = "vega10";
637                 break;
638         case CHIP_VEGA12:
639                 chip_name = "vega12";
640                 break;
641         case CHIP_VEGA20:
642                 chip_name = "vega20";
643                 break;
644         case CHIP_RAVEN:
645                 if (adev->rev_id >= 8)
646                         chip_name = "raven2";
647                 else if (adev->pdev->device == 0x15d8)
648                         chip_name = "picasso";
649                 else
650                         chip_name = "raven";
651                 break;
652         default:
653                 BUG();
654         }
655
656         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
657         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
658         if (err)
659                 goto out;
660         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
661         if (err)
662                 goto out;
663         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
664         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
665         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
666
667         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
668         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
669         if (err)
670                 goto out;
671         err = amdgpu_ucode_validate(adev->gfx.me_fw);
672         if (err)
673                 goto out;
674         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
675         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
676         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
677
678         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
679         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
680         if (err)
681                 goto out;
682         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
683         if (err)
684                 goto out;
685         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
686         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
687         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
688
689         /*
690          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
691          * instead of picasso_rlc.bin.
692          * Judgment method:
693          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
694          *          or revision >= 0xD8 && revision <= 0xDF
695          * otherwise is PCO FP5
696          */
697         if (!strcmp(chip_name, "picasso") &&
698                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
699                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
700                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
701         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
702                 (smu_version >= 0x41e2b))
703                 /**
704                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
705                 */
706                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
707         else
708                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
709         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
710         if (err)
711                 goto out;
712         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
713         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
714
715         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
716         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
717         if (version_major == 2 && version_minor == 1)
718                 adev->gfx.rlc.is_rlc_v2_1 = true;
719
720         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
721         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
722         adev->gfx.rlc.save_and_restore_offset =
723                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
724         adev->gfx.rlc.clear_state_descriptor_offset =
725                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
726         adev->gfx.rlc.avail_scratch_ram_locations =
727                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
728         adev->gfx.rlc.reg_restore_list_size =
729                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
730         adev->gfx.rlc.reg_list_format_start =
731                         le32_to_cpu(rlc_hdr->reg_list_format_start);
732         adev->gfx.rlc.reg_list_format_separate_start =
733                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
734         adev->gfx.rlc.starting_offsets_start =
735                         le32_to_cpu(rlc_hdr->starting_offsets_start);
736         adev->gfx.rlc.reg_list_format_size_bytes =
737                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
738         adev->gfx.rlc.reg_list_size_bytes =
739                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
740         adev->gfx.rlc.register_list_format =
741                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
742                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
743         if (!adev->gfx.rlc.register_list_format) {
744                 err = -ENOMEM;
745                 goto out;
746         }
747
748         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
749                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
750         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
751                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
752
753         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
754
755         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
756                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
757         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
758                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
759
760         if (adev->gfx.rlc.is_rlc_v2_1)
761                 gfx_v9_0_init_rlc_ext_microcode(adev);
762
763         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
764         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
765         if (err)
766                 goto out;
767         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
768         if (err)
769                 goto out;
770         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
771         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
772         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
773
774
775         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
776         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
777         if (!err) {
778                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
779                 if (err)
780                         goto out;
781                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
782                 adev->gfx.mec2_fw->data;
783                 adev->gfx.mec2_fw_version =
784                 le32_to_cpu(cp_hdr->header.ucode_version);
785                 adev->gfx.mec2_feature_version =
786                 le32_to_cpu(cp_hdr->ucode_feature_version);
787         } else {
788                 err = 0;
789                 adev->gfx.mec2_fw = NULL;
790         }
791
792         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
793                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
794                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
795                 info->fw = adev->gfx.pfp_fw;
796                 header = (const struct common_firmware_header *)info->fw->data;
797                 adev->firmware.fw_size +=
798                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
799
800                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
801                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
802                 info->fw = adev->gfx.me_fw;
803                 header = (const struct common_firmware_header *)info->fw->data;
804                 adev->firmware.fw_size +=
805                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
806
807                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
808                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
809                 info->fw = adev->gfx.ce_fw;
810                 header = (const struct common_firmware_header *)info->fw->data;
811                 adev->firmware.fw_size +=
812                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
813
814                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
815                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
816                 info->fw = adev->gfx.rlc_fw;
817                 header = (const struct common_firmware_header *)info->fw->data;
818                 adev->firmware.fw_size +=
819                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
820
821                 if (adev->gfx.rlc.is_rlc_v2_1 &&
822                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
823                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
824                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
825                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
826                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
827                         info->fw = adev->gfx.rlc_fw;
828                         adev->firmware.fw_size +=
829                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
830
831                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
832                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
833                         info->fw = adev->gfx.rlc_fw;
834                         adev->firmware.fw_size +=
835                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
836
837                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
838                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
839                         info->fw = adev->gfx.rlc_fw;
840                         adev->firmware.fw_size +=
841                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
842                 }
843
844                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
845                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
846                 info->fw = adev->gfx.mec_fw;
847                 header = (const struct common_firmware_header *)info->fw->data;
848                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
849                 adev->firmware.fw_size +=
850                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
851
852                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
853                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
854                 info->fw = adev->gfx.mec_fw;
855                 adev->firmware.fw_size +=
856                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
857
858                 if (adev->gfx.mec2_fw) {
859                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
860                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
861                         info->fw = adev->gfx.mec2_fw;
862                         header = (const struct common_firmware_header *)info->fw->data;
863                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
864                         adev->firmware.fw_size +=
865                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
866                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
867                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
868                         info->fw = adev->gfx.mec2_fw;
869                         adev->firmware.fw_size +=
870                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
871                 }
872
873         }
874
875 out:
876         gfx_v9_0_check_if_need_gfxoff(adev);
877         gfx_v9_0_check_fw_write_wait(adev);
878         if (err) {
879                 dev_err(adev->dev,
880                         "gfx9: Failed to load firmware \"%s\"\n",
881                         fw_name);
882                 release_firmware(adev->gfx.pfp_fw);
883                 adev->gfx.pfp_fw = NULL;
884                 release_firmware(adev->gfx.me_fw);
885                 adev->gfx.me_fw = NULL;
886                 release_firmware(adev->gfx.ce_fw);
887                 adev->gfx.ce_fw = NULL;
888                 release_firmware(adev->gfx.rlc_fw);
889                 adev->gfx.rlc_fw = NULL;
890                 release_firmware(adev->gfx.mec_fw);
891                 adev->gfx.mec_fw = NULL;
892                 release_firmware(adev->gfx.mec2_fw);
893                 adev->gfx.mec2_fw = NULL;
894         }
895         return err;
896 }
897
898 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
899 {
900         u32 count = 0;
901         const struct cs_section_def *sect = NULL;
902         const struct cs_extent_def *ext = NULL;
903
904         /* begin clear state */
905         count += 2;
906         /* context control state */
907         count += 3;
908
909         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
910                 for (ext = sect->section; ext->extent != NULL; ++ext) {
911                         if (sect->id == SECT_CONTEXT)
912                                 count += 2 + ext->reg_count;
913                         else
914                                 return 0;
915                 }
916         }
917
918         /* end clear state */
919         count += 2;
920         /* clear state */
921         count += 2;
922
923         return count;
924 }
925
926 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
927                                     volatile u32 *buffer)
928 {
929         u32 count = 0, i;
930         const struct cs_section_def *sect = NULL;
931         const struct cs_extent_def *ext = NULL;
932
933         if (adev->gfx.rlc.cs_data == NULL)
934                 return;
935         if (buffer == NULL)
936                 return;
937
938         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
939         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
940
941         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
942         buffer[count++] = cpu_to_le32(0x80000000);
943         buffer[count++] = cpu_to_le32(0x80000000);
944
945         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
946                 for (ext = sect->section; ext->extent != NULL; ++ext) {
947                         if (sect->id == SECT_CONTEXT) {
948                                 buffer[count++] =
949                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
950                                 buffer[count++] = cpu_to_le32(ext->reg_index -
951                                                 PACKET3_SET_CONTEXT_REG_START);
952                                 for (i = 0; i < ext->reg_count; i++)
953                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
954                         } else {
955                                 return;
956                         }
957                 }
958         }
959
960         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
961         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
962
963         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
964         buffer[count++] = cpu_to_le32(0);
965 }
966
967 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
968 {
969         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
970         uint32_t pg_always_on_cu_num = 2;
971         uint32_t always_on_cu_num;
972         uint32_t i, j, k;
973         uint32_t mask, cu_bitmap, counter;
974
975         if (adev->flags & AMD_IS_APU)
976                 always_on_cu_num = 4;
977         else if (adev->asic_type == CHIP_VEGA12)
978                 always_on_cu_num = 8;
979         else
980                 always_on_cu_num = 12;
981
982         mutex_lock(&adev->grbm_idx_mutex);
983         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
984                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
985                         mask = 1;
986                         cu_bitmap = 0;
987                         counter = 0;
988                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
989
990                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
991                                 if (cu_info->bitmap[i][j] & mask) {
992                                         if (counter == pg_always_on_cu_num)
993                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
994                                         if (counter < always_on_cu_num)
995                                                 cu_bitmap |= mask;
996                                         else
997                                                 break;
998                                         counter++;
999                                 }
1000                                 mask <<= 1;
1001                         }
1002
1003                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1004                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1005                 }
1006         }
1007         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1008         mutex_unlock(&adev->grbm_idx_mutex);
1009 }
1010
1011 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1012 {
1013         uint32_t data;
1014
1015         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1016         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1017         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1018         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1019         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1020
1021         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1022         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1023
1024         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1025         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1026
1027         mutex_lock(&adev->grbm_idx_mutex);
1028         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1029         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1030         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1031
1032         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1033         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1034         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1035         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1036         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1037
1038         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1039         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1040         data &= 0x0000FFFF;
1041         data |= 0x00C00000;
1042         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1043
1044         /*
1045          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1046          * programmed in gfx_v9_0_init_always_on_cu_mask()
1047          */
1048
1049         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1050          * but used for RLC_LB_CNTL configuration */
1051         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1052         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1053         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1054         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1055         mutex_unlock(&adev->grbm_idx_mutex);
1056
1057         gfx_v9_0_init_always_on_cu_mask(adev);
1058 }
1059
1060 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1061 {
1062         uint32_t data;
1063
1064         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1065         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1066         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1067         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1068         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1069
1070         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1071         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1072
1073         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1074         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1075
1076         mutex_lock(&adev->grbm_idx_mutex);
1077         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1078         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1079         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1080
1081         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1082         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1083         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1084         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1085         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1086
1087         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1088         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1089         data &= 0x0000FFFF;
1090         data |= 0x00C00000;
1091         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1092
1093         /*
1094          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1095          * programmed in gfx_v9_0_init_always_on_cu_mask()
1096          */
1097
1098         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1099          * but used for RLC_LB_CNTL configuration */
1100         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1101         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1102         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1103         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1104         mutex_unlock(&adev->grbm_idx_mutex);
1105
1106         gfx_v9_0_init_always_on_cu_mask(adev);
1107 }
1108
1109 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1110 {
1111         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1112 }
1113
1114 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1115 {
1116         return 5;
1117 }
1118
1119 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1120 {
1121         const struct cs_section_def *cs_data;
1122         int r;
1123
1124         adev->gfx.rlc.cs_data = gfx9_cs_data;
1125
1126         cs_data = adev->gfx.rlc.cs_data;
1127
1128         if (cs_data) {
1129                 /* init clear state block */
1130                 r = amdgpu_gfx_rlc_init_csb(adev);
1131                 if (r)
1132                         return r;
1133         }
1134
1135         if (adev->asic_type == CHIP_RAVEN) {
1136                 /* TODO: double check the cp_table_size for RV */
1137                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1138                 r = amdgpu_gfx_rlc_init_cpt(adev);
1139                 if (r)
1140                         return r;
1141         }
1142
1143         switch (adev->asic_type) {
1144         case CHIP_RAVEN:
1145                 gfx_v9_0_init_lbpw(adev);
1146                 break;
1147         case CHIP_VEGA20:
1148                 gfx_v9_4_init_lbpw(adev);
1149                 break;
1150         default:
1151                 break;
1152         }
1153
1154         return 0;
1155 }
1156
1157 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1158 {
1159         int r;
1160
1161         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1162         if (unlikely(r != 0))
1163                 return r;
1164
1165         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1166                         AMDGPU_GEM_DOMAIN_VRAM);
1167         if (!r)
1168                 adev->gfx.rlc.clear_state_gpu_addr =
1169                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1170
1171         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1172
1173         return r;
1174 }
1175
1176 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1177 {
1178         int r;
1179
1180         if (!adev->gfx.rlc.clear_state_obj)
1181                 return;
1182
1183         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1184         if (likely(r == 0)) {
1185                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1186                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1187         }
1188 }
1189
1190 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1191 {
1192         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1193         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1194 }
1195
1196 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1197 {
1198         int r;
1199         u32 *hpd;
1200         const __le32 *fw_data;
1201         unsigned fw_size;
1202         u32 *fw;
1203         size_t mec_hpd_size;
1204
1205         const struct gfx_firmware_header_v1_0 *mec_hdr;
1206
1207         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1208
1209         /* take ownership of the relevant compute queues */
1210         amdgpu_gfx_compute_queue_acquire(adev);
1211         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1212
1213         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1214                                       AMDGPU_GEM_DOMAIN_VRAM,
1215                                       &adev->gfx.mec.hpd_eop_obj,
1216                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1217                                       (void **)&hpd);
1218         if (r) {
1219                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1220                 gfx_v9_0_mec_fini(adev);
1221                 return r;
1222         }
1223
1224         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1225
1226         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1227         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1228
1229         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1230
1231         fw_data = (const __le32 *)
1232                 (adev->gfx.mec_fw->data +
1233                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1234         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1235
1236         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1237                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1238                                       &adev->gfx.mec.mec_fw_obj,
1239                                       &adev->gfx.mec.mec_fw_gpu_addr,
1240                                       (void **)&fw);
1241         if (r) {
1242                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1243                 gfx_v9_0_mec_fini(adev);
1244                 return r;
1245         }
1246
1247         memcpy(fw, fw_data, fw_size);
1248
1249         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1250         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1251
1252         return 0;
1253 }
1254
1255 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1256 {
1257         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1258                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1259                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1260                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1261                 (SQ_IND_INDEX__FORCE_READ_MASK));
1262         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1263 }
1264
1265 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1266                            uint32_t wave, uint32_t thread,
1267                            uint32_t regno, uint32_t num, uint32_t *out)
1268 {
1269         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1270                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1271                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1272                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1273                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1274                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1275                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1276         while (num--)
1277                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1278 }
1279
1280 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1281 {
1282         /* type 1 wave data */
1283         dst[(*no_fields)++] = 1;
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1294         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1295         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1296         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1297         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1298 }
1299
1300 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1301                                      uint32_t wave, uint32_t start,
1302                                      uint32_t size, uint32_t *dst)
1303 {
1304         wave_read_regs(
1305                 adev, simd, wave, 0,
1306                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1307 }
1308
1309 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1310                                      uint32_t wave, uint32_t thread,
1311                                      uint32_t start, uint32_t size,
1312                                      uint32_t *dst)
1313 {
1314         wave_read_regs(
1315                 adev, simd, wave, thread,
1316                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1317 }
1318
1319 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1320                                   u32 me, u32 pipe, u32 q, u32 vm)
1321 {
1322         soc15_grbm_select(adev, me, pipe, q, vm);
1323 }
1324
1325 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1326         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1327         .select_se_sh = &gfx_v9_0_select_se_sh,
1328         .read_wave_data = &gfx_v9_0_read_wave_data,
1329         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1330         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1331         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1332 };
1333
1334 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1335 {
1336         u32 gb_addr_config;
1337         int err;
1338
1339         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1340
1341         switch (adev->asic_type) {
1342         case CHIP_VEGA10:
1343                 adev->gfx.config.max_hw_contexts = 8;
1344                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1345                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1346                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1347                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1348                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1349                 break;
1350         case CHIP_VEGA12:
1351                 adev->gfx.config.max_hw_contexts = 8;
1352                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1356                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1357                 DRM_INFO("fix gfx.config for vega12\n");
1358                 break;
1359         case CHIP_VEGA20:
1360                 adev->gfx.config.max_hw_contexts = 8;
1361                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1362                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1363                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1364                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1365                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1366                 gb_addr_config &= ~0xf3e777ff;
1367                 gb_addr_config |= 0x22014042;
1368                 /* check vbios table if gpu info is not available */
1369                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1370                 if (err)
1371                         return err;
1372                 break;
1373         case CHIP_RAVEN:
1374                 adev->gfx.config.max_hw_contexts = 8;
1375                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1376                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1377                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1378                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1379                 if (adev->rev_id >= 8)
1380                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1381                 else
1382                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1383                 break;
1384         default:
1385                 BUG();
1386                 break;
1387         }
1388
1389         adev->gfx.config.gb_addr_config = gb_addr_config;
1390
1391         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1392                         REG_GET_FIELD(
1393                                         adev->gfx.config.gb_addr_config,
1394                                         GB_ADDR_CONFIG,
1395                                         NUM_PIPES);
1396
1397         adev->gfx.config.max_tile_pipes =
1398                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1399
1400         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1401                         REG_GET_FIELD(
1402                                         adev->gfx.config.gb_addr_config,
1403                                         GB_ADDR_CONFIG,
1404                                         NUM_BANKS);
1405         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1406                         REG_GET_FIELD(
1407                                         adev->gfx.config.gb_addr_config,
1408                                         GB_ADDR_CONFIG,
1409                                         MAX_COMPRESSED_FRAGS);
1410         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1411                         REG_GET_FIELD(
1412                                         adev->gfx.config.gb_addr_config,
1413                                         GB_ADDR_CONFIG,
1414                                         NUM_RB_PER_SE);
1415         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1416                         REG_GET_FIELD(
1417                                         adev->gfx.config.gb_addr_config,
1418                                         GB_ADDR_CONFIG,
1419                                         NUM_SHADER_ENGINES);
1420         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1421                         REG_GET_FIELD(
1422                                         adev->gfx.config.gb_addr_config,
1423                                         GB_ADDR_CONFIG,
1424                                         PIPE_INTERLEAVE_SIZE));
1425
1426         return 0;
1427 }
1428
1429 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1430                                    struct amdgpu_ngg_buf *ngg_buf,
1431                                    int size_se,
1432                                    int default_size_se)
1433 {
1434         int r;
1435
1436         if (size_se < 0) {
1437                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1438                 return -EINVAL;
1439         }
1440         size_se = size_se ? size_se : default_size_se;
1441
1442         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1443         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1444                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1445                                     &ngg_buf->bo,
1446                                     &ngg_buf->gpu_addr,
1447                                     NULL);
1448         if (r) {
1449                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1450                 return r;
1451         }
1452         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1453
1454         return r;
1455 }
1456
1457 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1458 {
1459         int i;
1460
1461         for (i = 0; i < NGG_BUF_MAX; i++)
1462                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1463                                       &adev->gfx.ngg.buf[i].gpu_addr,
1464                                       NULL);
1465
1466         memset(&adev->gfx.ngg.buf[0], 0,
1467                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1468
1469         adev->gfx.ngg.init = false;
1470
1471         return 0;
1472 }
1473
1474 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1475 {
1476         int r;
1477
1478         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1479                 return 0;
1480
1481         /* GDS reserve memory: 64 bytes alignment */
1482         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1483         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1484         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1485         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1486
1487         /* Primitive Buffer */
1488         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1489                                     amdgpu_prim_buf_per_se,
1490                                     64 * 1024);
1491         if (r) {
1492                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1493                 goto err;
1494         }
1495
1496         /* Position Buffer */
1497         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1498                                     amdgpu_pos_buf_per_se,
1499                                     256 * 1024);
1500         if (r) {
1501                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1502                 goto err;
1503         }
1504
1505         /* Control Sideband */
1506         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1507                                     amdgpu_cntl_sb_buf_per_se,
1508                                     256);
1509         if (r) {
1510                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1511                 goto err;
1512         }
1513
1514         /* Parameter Cache, not created by default */
1515         if (amdgpu_param_buf_per_se <= 0)
1516                 goto out;
1517
1518         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1519                                     amdgpu_param_buf_per_se,
1520                                     512 * 1024);
1521         if (r) {
1522                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1523                 goto err;
1524         }
1525
1526 out:
1527         adev->gfx.ngg.init = true;
1528         return 0;
1529 err:
1530         gfx_v9_0_ngg_fini(adev);
1531         return r;
1532 }
1533
1534 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1535 {
1536         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1537         int r;
1538         u32 data, base;
1539
1540         if (!amdgpu_ngg)
1541                 return 0;
1542
1543         /* Program buffer size */
1544         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1545                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1546         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1547                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1548         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1549
1550         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1551                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1552         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1553                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1554         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1555
1556         /* Program buffer base address */
1557         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1559         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1560
1561         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1562         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1563         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1564
1565         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1567         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1568
1569         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1570         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1571         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1572
1573         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1575         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1576
1577         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1578         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1579         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1580
1581         /* Clear GDS reserved memory */
1582         r = amdgpu_ring_alloc(ring, 17);
1583         if (r) {
1584                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1585                           ring->name, r);
1586                 return r;
1587         }
1588
1589         gfx_v9_0_write_data_to_reg(ring, 0, false,
1590                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1591                                    (adev->gds.gds_size +
1592                                     adev->gfx.ngg.gds_reserve_size));
1593
1594         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1595         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1596                                 PACKET3_DMA_DATA_DST_SEL(1) |
1597                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1598         amdgpu_ring_write(ring, 0);
1599         amdgpu_ring_write(ring, 0);
1600         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1601         amdgpu_ring_write(ring, 0);
1602         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1603                                 adev->gfx.ngg.gds_reserve_size);
1604
1605         gfx_v9_0_write_data_to_reg(ring, 0, false,
1606                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1607
1608         amdgpu_ring_commit(ring);
1609
1610         return 0;
1611 }
1612
1613 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1614                                       int mec, int pipe, int queue)
1615 {
1616         int r;
1617         unsigned irq_type;
1618         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1619
1620         ring = &adev->gfx.compute_ring[ring_id];
1621
1622         /* mec0 is me1 */
1623         ring->me = mec + 1;
1624         ring->pipe = pipe;
1625         ring->queue = queue;
1626
1627         ring->ring_obj = NULL;
1628         ring->use_doorbell = true;
1629         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1630         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1631                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1632         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1633
1634         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1635                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1636                 + ring->pipe;
1637
1638         /* type-2 packets are deprecated on MEC, use type-3 instead */
1639         r = amdgpu_ring_init(adev, ring, 1024,
1640                              &adev->gfx.eop_irq, irq_type);
1641         if (r)
1642                 return r;
1643
1644
1645         return 0;
1646 }
1647
1648 static int gfx_v9_0_sw_init(void *handle)
1649 {
1650         int i, j, k, r, ring_id;
1651         struct amdgpu_ring *ring;
1652         struct amdgpu_kiq *kiq;
1653         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1654
1655         switch (adev->asic_type) {
1656         case CHIP_VEGA10:
1657         case CHIP_VEGA12:
1658         case CHIP_VEGA20:
1659         case CHIP_RAVEN:
1660                 adev->gfx.mec.num_mec = 2;
1661                 break;
1662         default:
1663                 adev->gfx.mec.num_mec = 1;
1664                 break;
1665         }
1666
1667         adev->gfx.mec.num_pipe_per_mec = 4;
1668         adev->gfx.mec.num_queue_per_pipe = 8;
1669
1670         /* EOP Event */
1671         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1672         if (r)
1673                 return r;
1674
1675         /* Privileged reg */
1676         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1677                               &adev->gfx.priv_reg_irq);
1678         if (r)
1679                 return r;
1680
1681         /* Privileged inst */
1682         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1683                               &adev->gfx.priv_inst_irq);
1684         if (r)
1685                 return r;
1686
1687         /* ECC error */
1688         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1689                               &adev->gfx.cp_ecc_error_irq);
1690         if (r)
1691                 return r;
1692
1693         /* FUE error */
1694         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1695                               &adev->gfx.cp_ecc_error_irq);
1696         if (r)
1697                 return r;
1698
1699         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1700
1701         gfx_v9_0_scratch_init(adev);
1702
1703         r = gfx_v9_0_init_microcode(adev);
1704         if (r) {
1705                 DRM_ERROR("Failed to load gfx firmware!\n");
1706                 return r;
1707         }
1708
1709         r = adev->gfx.rlc.funcs->init(adev);
1710         if (r) {
1711                 DRM_ERROR("Failed to init rlc BOs!\n");
1712                 return r;
1713         }
1714
1715         r = gfx_v9_0_mec_init(adev);
1716         if (r) {
1717                 DRM_ERROR("Failed to init MEC BOs!\n");
1718                 return r;
1719         }
1720
1721         /* set up the gfx ring */
1722         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1723                 ring = &adev->gfx.gfx_ring[i];
1724                 ring->ring_obj = NULL;
1725                 if (!i)
1726                         sprintf(ring->name, "gfx");
1727                 else
1728                         sprintf(ring->name, "gfx_%d", i);
1729                 ring->use_doorbell = true;
1730                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1731                 r = amdgpu_ring_init(adev, ring, 1024,
1732                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1733                 if (r)
1734                         return r;
1735         }
1736
1737         /* set up the compute queues - allocate horizontally across pipes */
1738         ring_id = 0;
1739         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1740                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1741                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1742                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1743                                         continue;
1744
1745                                 r = gfx_v9_0_compute_ring_init(adev,
1746                                                                ring_id,
1747                                                                i, k, j);
1748                                 if (r)
1749                                         return r;
1750
1751                                 ring_id++;
1752                         }
1753                 }
1754         }
1755
1756         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1757         if (r) {
1758                 DRM_ERROR("Failed to init KIQ BOs!\n");
1759                 return r;
1760         }
1761
1762         kiq = &adev->gfx.kiq;
1763         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1764         if (r)
1765                 return r;
1766
1767         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1768         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1769         if (r)
1770                 return r;
1771
1772         adev->gfx.ce_ram_size = 0x8000;
1773
1774         r = gfx_v9_0_gpu_early_init(adev);
1775         if (r)
1776                 return r;
1777
1778         r = gfx_v9_0_ngg_init(adev);
1779         if (r)
1780                 return r;
1781
1782         return 0;
1783 }
1784
1785
1786 static int gfx_v9_0_sw_fini(void *handle)
1787 {
1788         int i;
1789         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1790
1791         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1792                         adev->gfx.ras_if) {
1793                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1794                 struct ras_ih_if ih_info = {
1795                         .head = *ras_if,
1796                 };
1797
1798                 amdgpu_ras_debugfs_remove(adev, ras_if);
1799                 amdgpu_ras_sysfs_remove(adev, ras_if);
1800                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1801                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1802                 kfree(ras_if);
1803         }
1804
1805         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1806                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1807         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1808                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1809
1810         amdgpu_gfx_mqd_sw_fini(adev);
1811         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1812         amdgpu_gfx_kiq_fini(adev);
1813
1814         gfx_v9_0_mec_fini(adev);
1815         gfx_v9_0_ngg_fini(adev);
1816         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1817         if (adev->asic_type == CHIP_RAVEN) {
1818                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1819                                 &adev->gfx.rlc.cp_table_gpu_addr,
1820                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1821         }
1822         gfx_v9_0_free_microcode(adev);
1823
1824         return 0;
1825 }
1826
1827
1828 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1829 {
1830         /* TODO */
1831 }
1832
1833 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1834 {
1835         u32 data;
1836
1837         if (instance == 0xffffffff)
1838                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1839         else
1840                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1841
1842         if (se_num == 0xffffffff)
1843                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1844         else
1845                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1846
1847         if (sh_num == 0xffffffff)
1848                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1849         else
1850                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1851
1852         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1853 }
1854
1855 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1856 {
1857         u32 data, mask;
1858
1859         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1860         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1861
1862         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1863         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1864
1865         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1866                                          adev->gfx.config.max_sh_per_se);
1867
1868         return (~data) & mask;
1869 }
1870
1871 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1872 {
1873         int i, j;
1874         u32 data;
1875         u32 active_rbs = 0;
1876         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1877                                         adev->gfx.config.max_sh_per_se;
1878
1879         mutex_lock(&adev->grbm_idx_mutex);
1880         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1881                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1882                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1883                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1884                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1885                                                rb_bitmap_width_per_sh);
1886                 }
1887         }
1888         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1889         mutex_unlock(&adev->grbm_idx_mutex);
1890
1891         adev->gfx.config.backend_enable_mask = active_rbs;
1892         adev->gfx.config.num_rbs = hweight32(active_rbs);
1893 }
1894
1895 #define DEFAULT_SH_MEM_BASES    (0x6000)
1896 #define FIRST_COMPUTE_VMID      (8)
1897 #define LAST_COMPUTE_VMID       (16)
1898 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1899 {
1900         int i;
1901         uint32_t sh_mem_config;
1902         uint32_t sh_mem_bases;
1903
1904         /*
1905          * Configure apertures:
1906          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1907          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1908          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1909          */
1910         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1911
1912         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1913                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1914                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1915
1916         mutex_lock(&adev->srbm_mutex);
1917         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1918                 soc15_grbm_select(adev, 0, 0, 0, i);
1919                 /* CP and shaders */
1920                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1921                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1922         }
1923         soc15_grbm_select(adev, 0, 0, 0, 0);
1924         mutex_unlock(&adev->srbm_mutex);
1925
1926         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1927            acccess. These should be enabled by FW for target VMIDs. */
1928         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1929                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
1930                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
1931                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
1932                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
1933         }
1934 }
1935
1936 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1937 {
1938         u32 tmp;
1939         int i;
1940
1941         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1942
1943         gfx_v9_0_tiling_mode_table_init(adev);
1944
1945         gfx_v9_0_setup_rb(adev);
1946         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1947         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1948
1949         /* XXX SH_MEM regs */
1950         /* where to put LDS, scratch, GPUVM in FSA64 space */
1951         mutex_lock(&adev->srbm_mutex);
1952         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1953                 soc15_grbm_select(adev, 0, 0, 0, i);
1954                 /* CP and shaders */
1955                 if (i == 0) {
1956                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1957                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1958                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1959                                             !!amdgpu_noretry);
1960                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1961                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1962                 } else {
1963                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1964                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1965                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1966                                             !!amdgpu_noretry);
1967                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1968                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1969                                 (adev->gmc.private_aperture_start >> 48));
1970                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1971                                 (adev->gmc.shared_aperture_start >> 48));
1972                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1973                 }
1974         }
1975         soc15_grbm_select(adev, 0, 0, 0, 0);
1976
1977         mutex_unlock(&adev->srbm_mutex);
1978
1979         gfx_v9_0_init_compute_vmid(adev);
1980 }
1981
1982 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1983 {
1984         u32 i, j, k;
1985         u32 mask;
1986
1987         mutex_lock(&adev->grbm_idx_mutex);
1988         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1989                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1990                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1991                         for (k = 0; k < adev->usec_timeout; k++) {
1992                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1993                                         break;
1994                                 udelay(1);
1995                         }
1996                         if (k == adev->usec_timeout) {
1997                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1998                                                       0xffffffff, 0xffffffff);
1999                                 mutex_unlock(&adev->grbm_idx_mutex);
2000                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2001                                          i, j);
2002                                 return;
2003                         }
2004                 }
2005         }
2006         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2007         mutex_unlock(&adev->grbm_idx_mutex);
2008
2009         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2010                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2011                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2012                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2013         for (k = 0; k < adev->usec_timeout; k++) {
2014                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2015                         break;
2016                 udelay(1);
2017         }
2018 }
2019
2020 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2021                                                bool enable)
2022 {
2023         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2024
2025         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2026         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2027         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2028         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2029
2030         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2031 }
2032
2033 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2034 {
2035         /* csib */
2036         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2037                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2038         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2039                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2040         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2041                         adev->gfx.rlc.clear_state_size);
2042 }
2043
2044 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2045                                 int indirect_offset,
2046                                 int list_size,
2047                                 int *unique_indirect_regs,
2048                                 int unique_indirect_reg_count,
2049                                 int *indirect_start_offsets,
2050                                 int *indirect_start_offsets_count,
2051                                 int max_start_offsets_count)
2052 {
2053         int idx;
2054
2055         for (; indirect_offset < list_size; indirect_offset++) {
2056                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2057                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2058                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2059
2060                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2061                         indirect_offset += 2;
2062
2063                         /* look for the matching indice */
2064                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2065                                 if (unique_indirect_regs[idx] ==
2066                                         register_list_format[indirect_offset] ||
2067                                         !unique_indirect_regs[idx])
2068                                         break;
2069                         }
2070
2071                         BUG_ON(idx >= unique_indirect_reg_count);
2072
2073                         if (!unique_indirect_regs[idx])
2074                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2075
2076                         indirect_offset++;
2077                 }
2078         }
2079 }
2080
2081 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2082 {
2083         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2084         int unique_indirect_reg_count = 0;
2085
2086         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2087         int indirect_start_offsets_count = 0;
2088
2089         int list_size = 0;
2090         int i = 0, j = 0;
2091         u32 tmp = 0;
2092
2093         u32 *register_list_format =
2094                 kmemdup(adev->gfx.rlc.register_list_format,
2095                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2096         if (!register_list_format)
2097                 return -ENOMEM;
2098
2099         /* setup unique_indirect_regs array and indirect_start_offsets array */
2100         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2101         gfx_v9_1_parse_ind_reg_list(register_list_format,
2102                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2103                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2104                                     unique_indirect_regs,
2105                                     unique_indirect_reg_count,
2106                                     indirect_start_offsets,
2107                                     &indirect_start_offsets_count,
2108                                     ARRAY_SIZE(indirect_start_offsets));
2109
2110         /* enable auto inc in case it is disabled */
2111         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2112         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2113         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2114
2115         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2116         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2117                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2118         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2119                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2120                         adev->gfx.rlc.register_restore[i]);
2121
2122         /* load indirect register */
2123         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2124                 adev->gfx.rlc.reg_list_format_start);
2125
2126         /* direct register portion */
2127         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2128                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2129                         register_list_format[i]);
2130
2131         /* indirect register portion */
2132         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2133                 if (register_list_format[i] == 0xFFFFFFFF) {
2134                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2135                         continue;
2136                 }
2137
2138                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2139                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2140
2141                 for (j = 0; j < unique_indirect_reg_count; j++) {
2142                         if (register_list_format[i] == unique_indirect_regs[j]) {
2143                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2144                                 break;
2145                         }
2146                 }
2147
2148                 BUG_ON(j >= unique_indirect_reg_count);
2149
2150                 i++;
2151         }
2152
2153         /* set save/restore list size */
2154         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2155         list_size = list_size >> 1;
2156         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2157                 adev->gfx.rlc.reg_restore_list_size);
2158         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2159
2160         /* write the starting offsets to RLC scratch ram */
2161         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2162                 adev->gfx.rlc.starting_offsets_start);
2163         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2164                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2165                        indirect_start_offsets[i]);
2166
2167         /* load unique indirect regs*/
2168         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2169                 if (unique_indirect_regs[i] != 0) {
2170                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2171                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2172                                unique_indirect_regs[i] & 0x3FFFF);
2173
2174                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2175                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2176                                unique_indirect_regs[i] >> 20);
2177                 }
2178         }
2179
2180         kfree(register_list_format);
2181         return 0;
2182 }
2183
2184 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2185 {
2186         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2187 }
2188
2189 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2190                                              bool enable)
2191 {
2192         uint32_t data = 0;
2193         uint32_t default_data = 0;
2194
2195         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2196         if (enable == true) {
2197                 /* enable GFXIP control over CGPG */
2198                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2199                 if(default_data != data)
2200                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2201
2202                 /* update status */
2203                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2204                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2205                 if(default_data != data)
2206                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2207         } else {
2208                 /* restore GFXIP control over GCPG */
2209                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2210                 if(default_data != data)
2211                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2212         }
2213 }
2214
2215 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2216 {
2217         uint32_t data = 0;
2218
2219         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2220                               AMD_PG_SUPPORT_GFX_SMG |
2221                               AMD_PG_SUPPORT_GFX_DMG)) {
2222                 /* init IDLE_POLL_COUNT = 60 */
2223                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2224                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2225                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2226                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2227
2228                 /* init RLC PG Delay */
2229                 data = 0;
2230                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2231                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2232                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2233                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2234                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2235
2236                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2237                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2238                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2239                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2240
2241                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2242                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2243                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2244                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2245
2246                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2247                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2248
2249                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2250                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2251                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2252
2253                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2254         }
2255 }
2256
2257 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2258                                                 bool enable)
2259 {
2260         uint32_t data = 0;
2261         uint32_t default_data = 0;
2262
2263         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2264         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2265                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2266                              enable ? 1 : 0);
2267         if (default_data != data)
2268                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2269 }
2270
2271 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2272                                                 bool enable)
2273 {
2274         uint32_t data = 0;
2275         uint32_t default_data = 0;
2276
2277         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2278         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2279                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2280                              enable ? 1 : 0);
2281         if(default_data != data)
2282                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2283 }
2284
2285 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2286                                         bool enable)
2287 {
2288         uint32_t data = 0;
2289         uint32_t default_data = 0;
2290
2291         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2292         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2293                              CP_PG_DISABLE,
2294                              enable ? 0 : 1);
2295         if(default_data != data)
2296                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2297 }
2298
2299 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2300                                                 bool enable)
2301 {
2302         uint32_t data, default_data;
2303
2304         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2305         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2306                              GFX_POWER_GATING_ENABLE,
2307                              enable ? 1 : 0);
2308         if(default_data != data)
2309                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2310 }
2311
2312 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2313                                                 bool enable)
2314 {
2315         uint32_t data, default_data;
2316
2317         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2318         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2319                              GFX_PIPELINE_PG_ENABLE,
2320                              enable ? 1 : 0);
2321         if(default_data != data)
2322                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2323
2324         if (!enable)
2325                 /* read any GFX register to wake up GFX */
2326                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2327 }
2328
2329 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2330                                                        bool enable)
2331 {
2332         uint32_t data, default_data;
2333
2334         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2335         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2336                              STATIC_PER_CU_PG_ENABLE,
2337                              enable ? 1 : 0);
2338         if(default_data != data)
2339                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2340 }
2341
2342 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2343                                                 bool enable)
2344 {
2345         uint32_t data, default_data;
2346
2347         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2348         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2349                              DYN_PER_CU_PG_ENABLE,
2350                              enable ? 1 : 0);
2351         if(default_data != data)
2352                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2353 }
2354
2355 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2356 {
2357         gfx_v9_0_init_csb(adev);
2358
2359         /*
2360          * Rlc save restore list is workable since v2_1.
2361          * And it's needed by gfxoff feature.
2362          */
2363         if (adev->gfx.rlc.is_rlc_v2_1) {
2364                 gfx_v9_1_init_rlc_save_restore_list(adev);
2365                 gfx_v9_0_enable_save_restore_machine(adev);
2366         }
2367
2368         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2369                               AMD_PG_SUPPORT_GFX_SMG |
2370                               AMD_PG_SUPPORT_GFX_DMG |
2371                               AMD_PG_SUPPORT_CP |
2372                               AMD_PG_SUPPORT_GDS |
2373                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2374                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2375                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2376                 gfx_v9_0_init_gfx_power_gating(adev);
2377         }
2378 }
2379
2380 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2381 {
2382         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2383         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2384         gfx_v9_0_wait_for_rlc_serdes(adev);
2385 }
2386
2387 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2388 {
2389         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2390         udelay(50);
2391         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2392         udelay(50);
2393 }
2394
2395 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2396 {
2397 #ifdef AMDGPU_RLC_DEBUG_RETRY
2398         u32 rlc_ucode_ver;
2399 #endif
2400
2401         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2402         udelay(50);
2403
2404         /* carrizo do enable cp interrupt after cp inited */
2405         if (!(adev->flags & AMD_IS_APU)) {
2406                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2407                 udelay(50);
2408         }
2409
2410 #ifdef AMDGPU_RLC_DEBUG_RETRY
2411         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2412         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2413         if(rlc_ucode_ver == 0x108) {
2414                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2415                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2416                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2417                  * default is 0x9C4 to create a 100us interval */
2418                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2419                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2420                  * to disable the page fault retry interrupts, default is
2421                  * 0x100 (256) */
2422                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2423         }
2424 #endif
2425 }
2426
2427 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2428 {
2429         const struct rlc_firmware_header_v2_0 *hdr;
2430         const __le32 *fw_data;
2431         unsigned i, fw_size;
2432
2433         if (!adev->gfx.rlc_fw)
2434                 return -EINVAL;
2435
2436         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2437         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2438
2439         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2440                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2441         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2442
2443         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2444                         RLCG_UCODE_LOADING_START_ADDRESS);
2445         for (i = 0; i < fw_size; i++)
2446                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2447         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2448
2449         return 0;
2450 }
2451
2452 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2453 {
2454         int r;
2455
2456         if (amdgpu_sriov_vf(adev)) {
2457                 gfx_v9_0_init_csb(adev);
2458                 return 0;
2459         }
2460
2461         adev->gfx.rlc.funcs->stop(adev);
2462
2463         /* disable CG */
2464         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2465
2466         gfx_v9_0_init_pg(adev);
2467
2468         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2469                 /* legacy rlc firmware loading */
2470                 r = gfx_v9_0_rlc_load_microcode(adev);
2471                 if (r)
2472                         return r;
2473         }
2474
2475         switch (adev->asic_type) {
2476         case CHIP_RAVEN:
2477                 if (amdgpu_lbpw == 0)
2478                         gfx_v9_0_enable_lbpw(adev, false);
2479                 else
2480                         gfx_v9_0_enable_lbpw(adev, true);
2481                 break;
2482         case CHIP_VEGA20:
2483                 if (amdgpu_lbpw > 0)
2484                         gfx_v9_0_enable_lbpw(adev, true);
2485                 else
2486                         gfx_v9_0_enable_lbpw(adev, false);
2487                 break;
2488         default:
2489                 break;
2490         }
2491
2492         adev->gfx.rlc.funcs->start(adev);
2493
2494         return 0;
2495 }
2496
2497 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2498 {
2499         int i;
2500         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2501
2502         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2503         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2504         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2505         if (!enable) {
2506                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2507                         adev->gfx.gfx_ring[i].sched.ready = false;
2508         }
2509         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2510         udelay(50);
2511 }
2512
2513 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2514 {
2515         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2516         const struct gfx_firmware_header_v1_0 *ce_hdr;
2517         const struct gfx_firmware_header_v1_0 *me_hdr;
2518         const __le32 *fw_data;
2519         unsigned i, fw_size;
2520
2521         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2522                 return -EINVAL;
2523
2524         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2525                 adev->gfx.pfp_fw->data;
2526         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2527                 adev->gfx.ce_fw->data;
2528         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2529                 adev->gfx.me_fw->data;
2530
2531         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2532         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2533         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2534
2535         gfx_v9_0_cp_gfx_enable(adev, false);
2536
2537         /* PFP */
2538         fw_data = (const __le32 *)
2539                 (adev->gfx.pfp_fw->data +
2540                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2541         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2542         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2543         for (i = 0; i < fw_size; i++)
2544                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2545         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2546
2547         /* CE */
2548         fw_data = (const __le32 *)
2549                 (adev->gfx.ce_fw->data +
2550                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2551         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2552         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2553         for (i = 0; i < fw_size; i++)
2554                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2555         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2556
2557         /* ME */
2558         fw_data = (const __le32 *)
2559                 (adev->gfx.me_fw->data +
2560                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2561         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2562         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2563         for (i = 0; i < fw_size; i++)
2564                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2565         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2566
2567         return 0;
2568 }
2569
2570 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2571 {
2572         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2573         const struct cs_section_def *sect = NULL;
2574         const struct cs_extent_def *ext = NULL;
2575         int r, i, tmp;
2576
2577         /* init the CP */
2578         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2579         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2580
2581         gfx_v9_0_cp_gfx_enable(adev, true);
2582
2583         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2584         if (r) {
2585                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2586                 return r;
2587         }
2588
2589         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2590         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2591
2592         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2593         amdgpu_ring_write(ring, 0x80000000);
2594         amdgpu_ring_write(ring, 0x80000000);
2595
2596         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2597                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2598                         if (sect->id == SECT_CONTEXT) {
2599                                 amdgpu_ring_write(ring,
2600                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2601                                                ext->reg_count));
2602                                 amdgpu_ring_write(ring,
2603                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2604                                 for (i = 0; i < ext->reg_count; i++)
2605                                         amdgpu_ring_write(ring, ext->extent[i]);
2606                         }
2607                 }
2608         }
2609
2610         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2611         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2612
2613         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2614         amdgpu_ring_write(ring, 0);
2615
2616         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2617         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2618         amdgpu_ring_write(ring, 0x8000);
2619         amdgpu_ring_write(ring, 0x8000);
2620
2621         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2622         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2623                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2624         amdgpu_ring_write(ring, tmp);
2625         amdgpu_ring_write(ring, 0);
2626
2627         amdgpu_ring_commit(ring);
2628
2629         return 0;
2630 }
2631
2632 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2633 {
2634         struct amdgpu_ring *ring;
2635         u32 tmp;
2636         u32 rb_bufsz;
2637         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2638
2639         /* Set the write pointer delay */
2640         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2641
2642         /* set the RB to use vmid 0 */
2643         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2644
2645         /* Set ring buffer size */
2646         ring = &adev->gfx.gfx_ring[0];
2647         rb_bufsz = order_base_2(ring->ring_size / 8);
2648         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2649         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2650 #ifdef __BIG_ENDIAN
2651         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2652 #endif
2653         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2654
2655         /* Initialize the ring buffer's write pointers */
2656         ring->wptr = 0;
2657         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2658         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2659
2660         /* set the wb address wether it's enabled or not */
2661         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2662         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2663         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2664
2665         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2666         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2667         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2668
2669         mdelay(1);
2670         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2671
2672         rb_addr = ring->gpu_addr >> 8;
2673         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2674         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2675
2676         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2677         if (ring->use_doorbell) {
2678                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2679                                     DOORBELL_OFFSET, ring->doorbell_index);
2680                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2681                                     DOORBELL_EN, 1);
2682         } else {
2683                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2684         }
2685         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2686
2687         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2688                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2689         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2690
2691         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2692                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2693
2694
2695         /* start the ring */
2696         gfx_v9_0_cp_gfx_start(adev);
2697         ring->sched.ready = true;
2698
2699         return 0;
2700 }
2701
2702 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2703 {
2704         int i;
2705
2706         if (enable) {
2707                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2708         } else {
2709                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2710                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2711                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2712                         adev->gfx.compute_ring[i].sched.ready = false;
2713                 adev->gfx.kiq.ring.sched.ready = false;
2714         }
2715         udelay(50);
2716 }
2717
2718 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2719 {
2720         const struct gfx_firmware_header_v1_0 *mec_hdr;
2721         const __le32 *fw_data;
2722         unsigned i;
2723         u32 tmp;
2724
2725         if (!adev->gfx.mec_fw)
2726                 return -EINVAL;
2727
2728         gfx_v9_0_cp_compute_enable(adev, false);
2729
2730         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2731         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2732
2733         fw_data = (const __le32 *)
2734                 (adev->gfx.mec_fw->data +
2735                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2736         tmp = 0;
2737         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2738         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2739         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2740
2741         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2742                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2743         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2744                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2745
2746         /* MEC1 */
2747         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2748                          mec_hdr->jt_offset);
2749         for (i = 0; i < mec_hdr->jt_size; i++)
2750                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2751                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2752
2753         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2754                         adev->gfx.mec_fw_version);
2755         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2756
2757         return 0;
2758 }
2759
2760 /* KIQ functions */
2761 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2762 {
2763         uint32_t tmp;
2764         struct amdgpu_device *adev = ring->adev;
2765
2766         /* tell RLC which is KIQ queue */
2767         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2768         tmp &= 0xffffff00;
2769         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2770         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2771         tmp |= 0x80;
2772         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2773 }
2774
2775 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2776 {
2777         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2778         uint64_t queue_mask = 0;
2779         int r, i;
2780
2781         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2782                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2783                         continue;
2784
2785                 /* This situation may be hit in the future if a new HW
2786                  * generation exposes more than 64 queues. If so, the
2787                  * definition of queue_mask needs updating */
2788                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2789                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2790                         break;
2791                 }
2792
2793                 queue_mask |= (1ull << i);
2794         }
2795
2796         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2797         if (r) {
2798                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2799                 return r;
2800         }
2801
2802         /* set resources */
2803         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2804         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2805                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2806         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2807         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2808         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2809         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2810         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2811         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2812         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2813                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2814                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2815                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2816
2817                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2818                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2819                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2820                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2821                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2822                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2823                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2824                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2825                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2826                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2827                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2828                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2829                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2830                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2831                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2832                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2833                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2834         }
2835
2836         r = amdgpu_ring_test_helper(kiq_ring);
2837         if (r)
2838                 DRM_ERROR("KCQ enable failed\n");
2839
2840         return r;
2841 }
2842
2843 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2844 {
2845         struct amdgpu_device *adev = ring->adev;
2846         struct v9_mqd *mqd = ring->mqd_ptr;
2847         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2848         uint32_t tmp;
2849
2850         mqd->header = 0xC0310800;
2851         mqd->compute_pipelinestat_enable = 0x00000001;
2852         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2853         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2854         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2855         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2856         mqd->compute_misc_reserved = 0x00000003;
2857
2858         mqd->dynamic_cu_mask_addr_lo =
2859                 lower_32_bits(ring->mqd_gpu_addr
2860                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2861         mqd->dynamic_cu_mask_addr_hi =
2862                 upper_32_bits(ring->mqd_gpu_addr
2863                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2864
2865         eop_base_addr = ring->eop_gpu_addr >> 8;
2866         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2867         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2868
2869         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2870         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2871         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2872                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2873
2874         mqd->cp_hqd_eop_control = tmp;
2875
2876         /* enable doorbell? */
2877         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2878
2879         if (ring->use_doorbell) {
2880                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2881                                     DOORBELL_OFFSET, ring->doorbell_index);
2882                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883                                     DOORBELL_EN, 1);
2884                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2885                                     DOORBELL_SOURCE, 0);
2886                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2887                                     DOORBELL_HIT, 0);
2888         } else {
2889                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2890                                          DOORBELL_EN, 0);
2891         }
2892
2893         mqd->cp_hqd_pq_doorbell_control = tmp;
2894
2895         /* disable the queue if it's active */
2896         ring->wptr = 0;
2897         mqd->cp_hqd_dequeue_request = 0;
2898         mqd->cp_hqd_pq_rptr = 0;
2899         mqd->cp_hqd_pq_wptr_lo = 0;
2900         mqd->cp_hqd_pq_wptr_hi = 0;
2901
2902         /* set the pointer to the MQD */
2903         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2904         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2905
2906         /* set MQD vmid to 0 */
2907         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2908         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2909         mqd->cp_mqd_control = tmp;
2910
2911         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2912         hqd_gpu_addr = ring->gpu_addr >> 8;
2913         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2914         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2915
2916         /* set up the HQD, this is similar to CP_RB0_CNTL */
2917         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2918         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2919                             (order_base_2(ring->ring_size / 4) - 1));
2920         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2921                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2922 #ifdef __BIG_ENDIAN
2923         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2924 #endif
2925         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2926         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2927         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2928         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2929         mqd->cp_hqd_pq_control = tmp;
2930
2931         /* set the wb address whether it's enabled or not */
2932         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2933         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2934         mqd->cp_hqd_pq_rptr_report_addr_hi =
2935                 upper_32_bits(wb_gpu_addr) & 0xffff;
2936
2937         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2938         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2939         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2940         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2941
2942         tmp = 0;
2943         /* enable the doorbell if requested */
2944         if (ring->use_doorbell) {
2945                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2946                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2947                                 DOORBELL_OFFSET, ring->doorbell_index);
2948
2949                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2950                                          DOORBELL_EN, 1);
2951                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2952                                          DOORBELL_SOURCE, 0);
2953                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2954                                          DOORBELL_HIT, 0);
2955         }
2956
2957         mqd->cp_hqd_pq_doorbell_control = tmp;
2958
2959         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2960         ring->wptr = 0;
2961         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2962
2963         /* set the vmid for the queue */
2964         mqd->cp_hqd_vmid = 0;
2965
2966         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2967         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2968         mqd->cp_hqd_persistent_state = tmp;
2969
2970         /* set MIN_IB_AVAIL_SIZE */
2971         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2972         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2973         mqd->cp_hqd_ib_control = tmp;
2974
2975         /* activate the queue */
2976         mqd->cp_hqd_active = 1;
2977
2978         return 0;
2979 }
2980
2981 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2982 {
2983         struct amdgpu_device *adev = ring->adev;
2984         struct v9_mqd *mqd = ring->mqd_ptr;
2985         int j;
2986
2987         /* disable wptr polling */
2988         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2989
2990         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2991                mqd->cp_hqd_eop_base_addr_lo);
2992         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2993                mqd->cp_hqd_eop_base_addr_hi);
2994
2995         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2996         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2997                mqd->cp_hqd_eop_control);
2998
2999         /* enable doorbell? */
3000         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3001                mqd->cp_hqd_pq_doorbell_control);
3002
3003         /* disable the queue if it's active */
3004         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3005                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3006                 for (j = 0; j < adev->usec_timeout; j++) {
3007                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3008                                 break;
3009                         udelay(1);
3010                 }
3011                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3012                        mqd->cp_hqd_dequeue_request);
3013                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3014                        mqd->cp_hqd_pq_rptr);
3015                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3016                        mqd->cp_hqd_pq_wptr_lo);
3017                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3018                        mqd->cp_hqd_pq_wptr_hi);
3019         }
3020
3021         /* set the pointer to the MQD */
3022         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3023                mqd->cp_mqd_base_addr_lo);
3024         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3025                mqd->cp_mqd_base_addr_hi);
3026
3027         /* set MQD vmid to 0 */
3028         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3029                mqd->cp_mqd_control);
3030
3031         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3032         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3033                mqd->cp_hqd_pq_base_lo);
3034         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3035                mqd->cp_hqd_pq_base_hi);
3036
3037         /* set up the HQD, this is similar to CP_RB0_CNTL */
3038         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3039                mqd->cp_hqd_pq_control);
3040
3041         /* set the wb address whether it's enabled or not */
3042         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3043                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3044         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3045                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3046
3047         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3048         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3049                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3050         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3051                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3052
3053         /* enable the doorbell if requested */
3054         if (ring->use_doorbell) {
3055                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3056                                         (adev->doorbell_index.kiq * 2) << 2);
3057                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3058                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3059         }
3060
3061         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3062                mqd->cp_hqd_pq_doorbell_control);
3063
3064         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3065         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3066                mqd->cp_hqd_pq_wptr_lo);
3067         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3068                mqd->cp_hqd_pq_wptr_hi);
3069
3070         /* set the vmid for the queue */
3071         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3072
3073         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3074                mqd->cp_hqd_persistent_state);
3075
3076         /* activate the queue */
3077         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3078                mqd->cp_hqd_active);
3079
3080         if (ring->use_doorbell)
3081                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3082
3083         return 0;
3084 }
3085
3086 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3087 {
3088         struct amdgpu_device *adev = ring->adev;
3089         int j;
3090
3091         /* disable the queue if it's active */
3092         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3093
3094                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3095
3096                 for (j = 0; j < adev->usec_timeout; j++) {
3097                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3098                                 break;
3099                         udelay(1);
3100                 }
3101
3102                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3103                         DRM_DEBUG("KIQ dequeue request failed.\n");
3104
3105                         /* Manual disable if dequeue request times out */
3106                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3107                 }
3108
3109                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3110                       0);
3111         }
3112
3113         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3114         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3115         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3116         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3117         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3118         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3119         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3120         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3121
3122         return 0;
3123 }
3124
3125 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3126 {
3127         struct amdgpu_device *adev = ring->adev;
3128         struct v9_mqd *mqd = ring->mqd_ptr;
3129         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3130
3131         gfx_v9_0_kiq_setting(ring);
3132
3133         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3134                 /* reset MQD to a clean status */
3135                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3136                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3137
3138                 /* reset ring buffer */
3139                 ring->wptr = 0;
3140                 amdgpu_ring_clear_ring(ring);
3141
3142                 mutex_lock(&adev->srbm_mutex);
3143                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3144                 gfx_v9_0_kiq_init_register(ring);
3145                 soc15_grbm_select(adev, 0, 0, 0, 0);
3146                 mutex_unlock(&adev->srbm_mutex);
3147         } else {
3148                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3149                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3150                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3151                 mutex_lock(&adev->srbm_mutex);
3152                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3153                 gfx_v9_0_mqd_init(ring);
3154                 gfx_v9_0_kiq_init_register(ring);
3155                 soc15_grbm_select(adev, 0, 0, 0, 0);
3156                 mutex_unlock(&adev->srbm_mutex);
3157
3158                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3159                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3160         }
3161
3162         return 0;
3163 }
3164
3165 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3166 {
3167         struct amdgpu_device *adev = ring->adev;
3168         struct v9_mqd *mqd = ring->mqd_ptr;
3169         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3170
3171         if (!adev->in_gpu_reset && !adev->in_suspend) {
3172                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3173                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3174                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3175                 mutex_lock(&adev->srbm_mutex);
3176                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3177                 gfx_v9_0_mqd_init(ring);
3178                 soc15_grbm_select(adev, 0, 0, 0, 0);
3179                 mutex_unlock(&adev->srbm_mutex);
3180
3181                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3182                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3183         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3184                 /* reset MQD to a clean status */
3185                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3186                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3187
3188                 /* reset ring buffer */
3189                 ring->wptr = 0;
3190                 amdgpu_ring_clear_ring(ring);
3191         } else {
3192                 amdgpu_ring_clear_ring(ring);
3193         }
3194
3195         return 0;
3196 }
3197
3198 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3199 {
3200         struct amdgpu_ring *ring;
3201         int r;
3202
3203         ring = &adev->gfx.kiq.ring;
3204
3205         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3206         if (unlikely(r != 0))
3207                 return r;
3208
3209         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3210         if (unlikely(r != 0))
3211                 return r;
3212
3213         gfx_v9_0_kiq_init_queue(ring);
3214         amdgpu_bo_kunmap(ring->mqd_obj);
3215         ring->mqd_ptr = NULL;
3216         amdgpu_bo_unreserve(ring->mqd_obj);
3217         ring->sched.ready = true;
3218         return 0;
3219 }
3220
3221 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3222 {
3223         struct amdgpu_ring *ring = NULL;
3224         int r = 0, i;
3225
3226         gfx_v9_0_cp_compute_enable(adev, true);
3227
3228         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3229                 ring = &adev->gfx.compute_ring[i];
3230
3231                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3232                 if (unlikely(r != 0))
3233                         goto done;
3234                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3235                 if (!r) {
3236                         r = gfx_v9_0_kcq_init_queue(ring);
3237                         amdgpu_bo_kunmap(ring->mqd_obj);
3238                         ring->mqd_ptr = NULL;
3239                 }
3240                 amdgpu_bo_unreserve(ring->mqd_obj);
3241                 if (r)
3242                         goto done;
3243         }
3244
3245         r = gfx_v9_0_kiq_kcq_enable(adev);
3246 done:
3247         return r;
3248 }
3249
3250 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3251 {
3252         int r, i;
3253         struct amdgpu_ring *ring;
3254
3255         if (!(adev->flags & AMD_IS_APU))
3256                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3257
3258         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3259                 /* legacy firmware loading */
3260                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3261                 if (r)
3262                         return r;
3263
3264                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3265                 if (r)
3266                         return r;
3267         }
3268
3269         r = gfx_v9_0_kiq_resume(adev);
3270         if (r)
3271                 return r;
3272
3273         r = gfx_v9_0_cp_gfx_resume(adev);
3274         if (r)
3275                 return r;
3276
3277         r = gfx_v9_0_kcq_resume(adev);
3278         if (r)
3279                 return r;
3280
3281         ring = &adev->gfx.gfx_ring[0];
3282         r = amdgpu_ring_test_helper(ring);
3283         if (r)
3284                 return r;
3285
3286         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3287                 ring = &adev->gfx.compute_ring[i];
3288                 amdgpu_ring_test_helper(ring);
3289         }
3290
3291         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3292
3293         return 0;
3294 }
3295
3296 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3297 {
3298         gfx_v9_0_cp_gfx_enable(adev, enable);
3299         gfx_v9_0_cp_compute_enable(adev, enable);
3300 }
3301
3302 static int gfx_v9_0_hw_init(void *handle)
3303 {
3304         int r;
3305         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3306
3307         gfx_v9_0_init_golden_registers(adev);
3308
3309         gfx_v9_0_constants_init(adev);
3310
3311         r = gfx_v9_0_csb_vram_pin(adev);
3312         if (r)
3313                 return r;
3314
3315         r = adev->gfx.rlc.funcs->resume(adev);
3316         if (r)
3317                 return r;
3318
3319         r = gfx_v9_0_cp_resume(adev);
3320         if (r)
3321                 return r;
3322
3323         r = gfx_v9_0_ngg_en(adev);
3324         if (r)
3325                 return r;
3326
3327         return r;
3328 }
3329
3330 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3331 {
3332         int r, i;
3333         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3334
3335         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3336         if (r)
3337                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3338
3339         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3340                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3341
3342                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3343                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3344                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3345                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3346                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3347                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3348                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3349                 amdgpu_ring_write(kiq_ring, 0);
3350                 amdgpu_ring_write(kiq_ring, 0);
3351                 amdgpu_ring_write(kiq_ring, 0);
3352         }
3353         r = amdgpu_ring_test_helper(kiq_ring);
3354         if (r)
3355                 DRM_ERROR("KCQ disable failed\n");
3356
3357         return r;
3358 }
3359
3360 static int gfx_v9_0_hw_fini(void *handle)
3361 {
3362         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3363
3364         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3365         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3366         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3367
3368         /* disable KCQ to avoid CPC touch memory not valid anymore */
3369         gfx_v9_0_kcq_disable(adev);
3370
3371         if (amdgpu_sriov_vf(adev)) {
3372                 gfx_v9_0_cp_gfx_enable(adev, false);
3373                 /* must disable polling for SRIOV when hw finished, otherwise
3374                  * CPC engine may still keep fetching WB address which is already
3375                  * invalid after sw finished and trigger DMAR reading error in
3376                  * hypervisor side.
3377                  */
3378                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3379                 return 0;
3380         }
3381
3382         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3383          * otherwise KIQ is hanging when binding back
3384          */
3385         if (!adev->in_gpu_reset && !adev->in_suspend) {
3386                 mutex_lock(&adev->srbm_mutex);
3387                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3388                                 adev->gfx.kiq.ring.pipe,
3389                                 adev->gfx.kiq.ring.queue, 0);
3390                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3391                 soc15_grbm_select(adev, 0, 0, 0, 0);
3392                 mutex_unlock(&adev->srbm_mutex);
3393         }
3394
3395         gfx_v9_0_cp_enable(adev, false);
3396         adev->gfx.rlc.funcs->stop(adev);
3397
3398         gfx_v9_0_csb_vram_unpin(adev);
3399
3400         return 0;
3401 }
3402
3403 static int gfx_v9_0_suspend(void *handle)
3404 {
3405         return gfx_v9_0_hw_fini(handle);
3406 }
3407
3408 static int gfx_v9_0_resume(void *handle)
3409 {
3410         return gfx_v9_0_hw_init(handle);
3411 }
3412
3413 static bool gfx_v9_0_is_idle(void *handle)
3414 {
3415         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3416
3417         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3418                                 GRBM_STATUS, GUI_ACTIVE))
3419                 return false;
3420         else
3421                 return true;
3422 }
3423
3424 static int gfx_v9_0_wait_for_idle(void *handle)
3425 {
3426         unsigned i;
3427         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3428
3429         for (i = 0; i < adev->usec_timeout; i++) {
3430                 if (gfx_v9_0_is_idle(handle))
3431                         return 0;
3432                 udelay(1);
3433         }
3434         return -ETIMEDOUT;
3435 }
3436
3437 static int gfx_v9_0_soft_reset(void *handle)
3438 {
3439         u32 grbm_soft_reset = 0;
3440         u32 tmp;
3441         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3442
3443         /* GRBM_STATUS */
3444         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3445         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3446                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3447                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3448                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3449                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3450                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3451                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3452                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3453                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3455         }
3456
3457         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3458                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3459                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3460         }
3461
3462         /* GRBM_STATUS2 */
3463         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3464         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3465                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3466                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3467
3468
3469         if (grbm_soft_reset) {
3470                 /* stop the rlc */
3471                 adev->gfx.rlc.funcs->stop(adev);
3472
3473                 /* Disable GFX parsing/prefetching */
3474                 gfx_v9_0_cp_gfx_enable(adev, false);
3475
3476                 /* Disable MEC parsing/prefetching */
3477                 gfx_v9_0_cp_compute_enable(adev, false);
3478
3479                 if (grbm_soft_reset) {
3480                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3481                         tmp |= grbm_soft_reset;
3482                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3483                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3484                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3485
3486                         udelay(50);
3487
3488                         tmp &= ~grbm_soft_reset;
3489                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3490                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3491                 }
3492
3493                 /* Wait a little for things to settle down */
3494                 udelay(50);
3495         }
3496         return 0;
3497 }
3498
3499 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3500 {
3501         uint64_t clock;
3502
3503         mutex_lock(&adev->gfx.gpu_clock_mutex);
3504         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3505         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3506                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3507         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3508         return clock;
3509 }
3510
3511 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3512                                           uint32_t vmid,
3513                                           uint32_t gds_base, uint32_t gds_size,
3514                                           uint32_t gws_base, uint32_t gws_size,
3515                                           uint32_t oa_base, uint32_t oa_size)
3516 {
3517         struct amdgpu_device *adev = ring->adev;
3518
3519         /* GDS Base */
3520         gfx_v9_0_write_data_to_reg(ring, 0, false,
3521                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3522                                    gds_base);
3523
3524         /* GDS Size */
3525         gfx_v9_0_write_data_to_reg(ring, 0, false,
3526                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3527                                    gds_size);
3528
3529         /* GWS */
3530         gfx_v9_0_write_data_to_reg(ring, 0, false,
3531                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3532                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3533
3534         /* OA */
3535         gfx_v9_0_write_data_to_reg(ring, 0, false,
3536                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3537                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3538 }
3539
3540 static const u32 vgpr_init_compute_shader[] =
3541 {
3542         0xb07c0000, 0xbe8000ff,
3543         0x000000f8, 0xbf110800,
3544         0x7e000280, 0x7e020280,
3545         0x7e040280, 0x7e060280,
3546         0x7e080280, 0x7e0a0280,
3547         0x7e0c0280, 0x7e0e0280,
3548         0x80808800, 0xbe803200,
3549         0xbf84fff5, 0xbf9c0000,
3550         0xd28c0001, 0x0001007f,
3551         0xd28d0001, 0x0002027e,
3552         0x10020288, 0xb8810904,
3553         0xb7814000, 0xd1196a01,
3554         0x00000301, 0xbe800087,
3555         0xbefc00c1, 0xd89c4000,
3556         0x00020201, 0xd89cc080,
3557         0x00040401, 0x320202ff,
3558         0x00000800, 0x80808100,
3559         0xbf84fff8, 0x7e020280,
3560         0xbf810000, 0x00000000,
3561 };
3562
3563 static const u32 sgpr_init_compute_shader[] =
3564 {
3565         0xb07c0000, 0xbe8000ff,
3566         0x0000005f, 0xbee50080,
3567         0xbe812c65, 0xbe822c65,
3568         0xbe832c65, 0xbe842c65,
3569         0xbe852c65, 0xb77c0005,
3570         0x80808500, 0xbf84fff8,
3571         0xbe800080, 0xbf810000,
3572 };
3573
3574 static const struct soc15_reg_entry vgpr_init_regs[] = {
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3585 };
3586
3587 static const struct soc15_reg_entry sgpr_init_regs[] = {
3588    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3589    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3594    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3595    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3596    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3597    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3598 };
3599
3600 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3601    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3602    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3603    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3604    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3605    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3606    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3607    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3608    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3609    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3611    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3612    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3613    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3614    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3615    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3616    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3617    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3619    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3620    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3621    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3622    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3623    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3624    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3625    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3626    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3627    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3628    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3629    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3630    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3631    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3632    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3633 };
3634
3635 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3636 {
3637         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3638         int i, r;
3639
3640         r = amdgpu_ring_alloc(ring, 7);
3641         if (r) {
3642                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3643                         ring->name, r);
3644                 return r;
3645         }
3646
3647         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3648         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3649
3650         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3651         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3652                                 PACKET3_DMA_DATA_DST_SEL(1) |
3653                                 PACKET3_DMA_DATA_SRC_SEL(2) |
3654                                 PACKET3_DMA_DATA_ENGINE(0)));
3655         amdgpu_ring_write(ring, 0);
3656         amdgpu_ring_write(ring, 0);
3657         amdgpu_ring_write(ring, 0);
3658         amdgpu_ring_write(ring, 0);
3659         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3660                                 adev->gds.gds_size);
3661
3662         amdgpu_ring_commit(ring);
3663
3664         for (i = 0; i < adev->usec_timeout; i++) {
3665                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3666                         break;
3667                 udelay(1);
3668         }
3669
3670         if (i >= adev->usec_timeout)
3671                 r = -ETIMEDOUT;
3672
3673         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3674
3675         return r;
3676 }
3677
3678 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3679 {
3680         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3681         struct amdgpu_ib ib;
3682         struct dma_fence *f = NULL;
3683         int r, i, j, k;
3684         unsigned total_size, vgpr_offset, sgpr_offset;
3685         u64 gpu_addr;
3686
3687         /* only support when RAS is enabled */
3688         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3689                 return 0;
3690
3691         /* bail if the compute ring is not ready */
3692         if (!ring->sched.ready)
3693                 return 0;
3694
3695         total_size =
3696                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3697         total_size +=
3698                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3699         total_size = ALIGN(total_size, 256);
3700         vgpr_offset = total_size;
3701         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3702         sgpr_offset = total_size;
3703         total_size += sizeof(sgpr_init_compute_shader);
3704
3705         /* allocate an indirect buffer to put the commands in */
3706         memset(&ib, 0, sizeof(ib));
3707         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3708         if (r) {
3709                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3710                 return r;
3711         }
3712
3713         /* load the compute shaders */
3714         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3715                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3716
3717         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3718                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3719
3720         /* init the ib length to 0 */
3721         ib.length_dw = 0;
3722
3723         /* VGPR */
3724         /* write the register state for the compute dispatch */
3725         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3726                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3727                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3728                                                                 - PACKET3_SET_SH_REG_START;
3729                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3730         }
3731         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3732         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3733         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3734         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3735                                                         - PACKET3_SET_SH_REG_START;
3736         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3737         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3738
3739         /* write dispatch packet */
3740         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3741         ib.ptr[ib.length_dw++] = 128; /* x */
3742         ib.ptr[ib.length_dw++] = 1; /* y */
3743         ib.ptr[ib.length_dw++] = 1; /* z */
3744         ib.ptr[ib.length_dw++] =
3745                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3746
3747         /* write CS partial flush packet */
3748         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3749         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3750
3751         /* SGPR */
3752         /* write the register state for the compute dispatch */
3753         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3754                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3755                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3756                                                                 - PACKET3_SET_SH_REG_START;
3757                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3758         }
3759         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3760         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3761         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3762         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3763                                                         - PACKET3_SET_SH_REG_START;
3764         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3765         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3766
3767         /* write dispatch packet */
3768         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3769         ib.ptr[ib.length_dw++] = 128; /* x */
3770         ib.ptr[ib.length_dw++] = 1; /* y */
3771         ib.ptr[ib.length_dw++] = 1; /* z */
3772         ib.ptr[ib.length_dw++] =
3773                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3774
3775         /* write CS partial flush packet */
3776         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3777         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3778
3779         /* shedule the ib on the ring */
3780         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3781         if (r) {
3782                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3783                 goto fail;
3784         }
3785
3786         /* wait for the GPU to finish processing the IB */
3787         r = dma_fence_wait(f, false);
3788         if (r) {
3789                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3790                 goto fail;
3791         }
3792
3793         /* read back registers to clear the counters */
3794         mutex_lock(&adev->grbm_idx_mutex);
3795         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3796                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3797                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3798                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3799                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3800                         }
3801                 }
3802         }
3803         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3804         mutex_unlock(&adev->grbm_idx_mutex);
3805
3806 fail:
3807         amdgpu_ib_free(adev, &ib, NULL);
3808         dma_fence_put(f);
3809
3810         return r;
3811 }
3812
3813 static int gfx_v9_0_early_init(void *handle)
3814 {
3815         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3816
3817         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3818         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3819         gfx_v9_0_set_ring_funcs(adev);
3820         gfx_v9_0_set_irq_funcs(adev);
3821         gfx_v9_0_set_gds_init(adev);
3822         gfx_v9_0_set_rlc_funcs(adev);
3823
3824         return 0;
3825 }
3826
3827 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3828                 struct amdgpu_iv_entry *entry);
3829
3830 static int gfx_v9_0_ecc_late_init(void *handle)
3831 {
3832         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3833         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3834         struct ras_ih_if ih_info = {
3835                 .cb = gfx_v9_0_process_ras_data_cb,
3836         };
3837         struct ras_fs_if fs_info = {
3838                 .sysfs_name = "gfx_err_count",
3839                 .debugfs_name = "gfx_err_inject",
3840         };
3841         struct ras_common_if ras_block = {
3842                 .block = AMDGPU_RAS_BLOCK__GFX,
3843                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3844                 .sub_block_index = 0,
3845                 .name = "gfx",
3846         };
3847         int r;
3848
3849         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3850                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3851                 return 0;
3852         }
3853
3854         r = gfx_v9_0_do_edc_gds_workarounds(adev);
3855         if (r)
3856                 return r;
3857
3858         /* requires IBs so do in late init after IB pool is initialized */
3859         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3860         if (r)
3861                 return r;
3862
3863         /* handle resume path. */
3864         if (*ras_if) {
3865                 /* resend ras TA enable cmd during resume.
3866                  * prepare to handle failure.
3867                  */
3868                 ih_info.head = **ras_if;
3869                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3870                 if (r) {
3871                         if (r == -EAGAIN) {
3872                                 /* request a gpu reset. will run again. */
3873                                 amdgpu_ras_request_reset_on_boot(adev,
3874                                                 AMDGPU_RAS_BLOCK__GFX);
3875                                 return 0;
3876                         }
3877                         /* fail to enable ras, cleanup all. */
3878                         goto irq;
3879                 }
3880                 /* enable successfully. continue. */
3881                 goto resume;
3882         }
3883
3884         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3885         if (!*ras_if)
3886                 return -ENOMEM;
3887
3888         **ras_if = ras_block;
3889
3890         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3891         if (r) {
3892                 if (r == -EAGAIN) {
3893                         amdgpu_ras_request_reset_on_boot(adev,
3894                                         AMDGPU_RAS_BLOCK__GFX);
3895                         r = 0;
3896                 }
3897                 goto feature;
3898         }
3899
3900         ih_info.head = **ras_if;
3901         fs_info.head = **ras_if;
3902
3903         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3904         if (r)
3905                 goto interrupt;
3906
3907         amdgpu_ras_debugfs_create(adev, &fs_info);
3908
3909         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3910         if (r)
3911                 goto sysfs;
3912 resume:
3913         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3914         if (r)
3915                 goto irq;
3916
3917         return 0;
3918 irq:
3919         amdgpu_ras_sysfs_remove(adev, *ras_if);
3920 sysfs:
3921         amdgpu_ras_debugfs_remove(adev, *ras_if);
3922         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3923 interrupt:
3924         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3925 feature:
3926         kfree(*ras_if);
3927         *ras_if = NULL;
3928         return r;
3929 }
3930
3931 static int gfx_v9_0_late_init(void *handle)
3932 {
3933         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3934         int r;
3935
3936         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3937         if (r)
3938                 return r;
3939
3940         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3941         if (r)
3942                 return r;
3943
3944         r = gfx_v9_0_ecc_late_init(handle);
3945         if (r)
3946                 return r;
3947
3948         return 0;
3949 }
3950
3951 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3952 {
3953         uint32_t rlc_setting;
3954
3955         /* if RLC is not enabled, do nothing */
3956         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3957         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3958                 return false;
3959
3960         return true;
3961 }
3962
3963 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3964 {
3965         uint32_t data;
3966         unsigned i;
3967
3968         data = RLC_SAFE_MODE__CMD_MASK;
3969         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3970         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3971
3972         /* wait for RLC_SAFE_MODE */
3973         for (i = 0; i < adev->usec_timeout; i++) {
3974                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3975                         break;
3976                 udelay(1);
3977         }
3978 }
3979
3980 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3981 {
3982         uint32_t data;
3983
3984         data = RLC_SAFE_MODE__CMD_MASK;
3985         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3986 }
3987
3988 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3989                                                 bool enable)
3990 {
3991         amdgpu_gfx_rlc_enter_safe_mode(adev);
3992
3993         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3994                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3995                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3996                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3997         } else {
3998                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3999                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4000         }
4001
4002         amdgpu_gfx_rlc_exit_safe_mode(adev);
4003 }
4004
4005 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4006                                                 bool enable)
4007 {
4008         /* TODO: double check if we need to perform under safe mode */
4009         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4010
4011         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4012                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4013         else
4014                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4015
4016         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4017                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4018         else
4019                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4020
4021         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4022 }
4023
4024 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4025                                                       bool enable)
4026 {
4027         uint32_t data, def;
4028
4029         amdgpu_gfx_rlc_enter_safe_mode(adev);
4030
4031         /* It is disabled by HW by default */
4032         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4033                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4034                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4035
4036                 if (adev->asic_type != CHIP_VEGA12)
4037                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4038
4039                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4040                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4041                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4042
4043                 /* only for Vega10 & Raven1 */
4044                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4045
4046                 if (def != data)
4047                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4048
4049                 /* MGLS is a global flag to control all MGLS in GFX */
4050                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4051                         /* 2 - RLC memory Light sleep */
4052                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4053                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4054                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4055                                 if (def != data)
4056                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4057                         }
4058                         /* 3 - CP memory Light sleep */
4059                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4060                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4061                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4062                                 if (def != data)
4063                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4064                         }
4065                 }
4066         } else {
4067                 /* 1 - MGCG_OVERRIDE */
4068                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4069
4070                 if (adev->asic_type != CHIP_VEGA12)
4071                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4072
4073                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4074                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4075                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4076                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4077
4078                 if (def != data)
4079                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4080
4081                 /* 2 - disable MGLS in RLC */
4082                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4083                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4084                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4085                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4086                 }
4087
4088                 /* 3 - disable MGLS in CP */
4089                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4090                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4091                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4092                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4093                 }
4094         }
4095
4096         amdgpu_gfx_rlc_exit_safe_mode(adev);
4097 }
4098
4099 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4100                                            bool enable)
4101 {
4102         uint32_t data, def;
4103
4104         amdgpu_gfx_rlc_enter_safe_mode(adev);
4105
4106         /* Enable 3D CGCG/CGLS */
4107         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4108                 /* write cmd to clear cgcg/cgls ov */
4109                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4110                 /* unset CGCG override */
4111                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4112                 /* update CGCG and CGLS override bits */
4113                 if (def != data)
4114                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4115
4116                 /* enable 3Dcgcg FSM(0x0000363f) */
4117                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4118
4119                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4120                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4121                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4122                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4123                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4124                 if (def != data)
4125                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4126
4127                 /* set IDLE_POLL_COUNT(0x00900100) */
4128                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4129                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4130                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4131                 if (def != data)
4132                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4133         } else {
4134                 /* Disable CGCG/CGLS */
4135                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4136                 /* disable cgcg, cgls should be disabled */
4137                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4138                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4139                 /* disable cgcg and cgls in FSM */
4140                 if (def != data)
4141                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4142         }
4143
4144         amdgpu_gfx_rlc_exit_safe_mode(adev);
4145 }
4146
4147 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4148                                                       bool enable)
4149 {
4150         uint32_t def, data;
4151
4152         amdgpu_gfx_rlc_enter_safe_mode(adev);
4153
4154         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4155                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4156                 /* unset CGCG override */
4157                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4158                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4159                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4160                 else
4161                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4162                 /* update CGCG and CGLS override bits */
4163                 if (def != data)
4164                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4165
4166                 /* enable cgcg FSM(0x0000363F) */
4167                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4168
4169                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4170                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4171                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4172                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4173                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4174                 if (def != data)
4175                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4176
4177                 /* set IDLE_POLL_COUNT(0x00900100) */
4178                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4179                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4180                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4181                 if (def != data)
4182                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4183         } else {
4184                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4185                 /* reset CGCG/CGLS bits */
4186                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4187                 /* disable cgcg and cgls in FSM */
4188                 if (def != data)
4189                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4190         }
4191
4192         amdgpu_gfx_rlc_exit_safe_mode(adev);
4193 }
4194
4195 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4196                                             bool enable)
4197 {
4198         if (enable) {
4199                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4200                  * ===  MGCG + MGLS ===
4201                  */
4202                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4203                 /* ===  CGCG /CGLS for GFX 3D Only === */
4204                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4205                 /* ===  CGCG + CGLS === */
4206                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4207         } else {
4208                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4209                  * ===  CGCG + CGLS ===
4210                  */
4211                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4212                 /* ===  CGCG /CGLS for GFX 3D Only === */
4213                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4214                 /* ===  MGCG + MGLS === */
4215                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4216         }
4217         return 0;
4218 }
4219
4220 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4221         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4222         .set_safe_mode = gfx_v9_0_set_safe_mode,
4223         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4224         .init = gfx_v9_0_rlc_init,
4225         .get_csb_size = gfx_v9_0_get_csb_size,
4226         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4227         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4228         .resume = gfx_v9_0_rlc_resume,
4229         .stop = gfx_v9_0_rlc_stop,
4230         .reset = gfx_v9_0_rlc_reset,
4231         .start = gfx_v9_0_rlc_start
4232 };
4233
4234 static int gfx_v9_0_set_powergating_state(void *handle,
4235                                           enum amd_powergating_state state)
4236 {
4237         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4238         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4239
4240         switch (adev->asic_type) {
4241         case CHIP_RAVEN:
4242                 if (!enable) {
4243                         amdgpu_gfx_off_ctrl(adev, false);
4244                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4245                 }
4246                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4247                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4248                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4249                 } else {
4250                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4251                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4252                 }
4253
4254                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4255                         gfx_v9_0_enable_cp_power_gating(adev, true);
4256                 else
4257                         gfx_v9_0_enable_cp_power_gating(adev, false);
4258
4259                 /* update gfx cgpg state */
4260                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4261
4262                 /* update mgcg state */
4263                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4264
4265                 if (enable)
4266                         amdgpu_gfx_off_ctrl(adev, true);
4267                 break;
4268         case CHIP_VEGA12:
4269                 if (!enable) {
4270                         amdgpu_gfx_off_ctrl(adev, false);
4271                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4272                 } else {
4273                         amdgpu_gfx_off_ctrl(adev, true);
4274                 }
4275                 break;
4276         default:
4277                 break;
4278         }
4279
4280         return 0;
4281 }
4282
4283 static int gfx_v9_0_set_clockgating_state(void *handle,
4284                                           enum amd_clockgating_state state)
4285 {
4286         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4287
4288         if (amdgpu_sriov_vf(adev))
4289                 return 0;
4290
4291         switch (adev->asic_type) {
4292         case CHIP_VEGA10:
4293         case CHIP_VEGA12:
4294         case CHIP_VEGA20:
4295         case CHIP_RAVEN:
4296                 gfx_v9_0_update_gfx_clock_gating(adev,
4297                                                  state == AMD_CG_STATE_GATE ? true : false);
4298                 break;
4299         default:
4300                 break;
4301         }
4302         return 0;
4303 }
4304
4305 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4306 {
4307         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4308         int data;
4309
4310         if (amdgpu_sriov_vf(adev))
4311                 *flags = 0;
4312
4313         /* AMD_CG_SUPPORT_GFX_MGCG */
4314         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4315         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4316                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4317
4318         /* AMD_CG_SUPPORT_GFX_CGCG */
4319         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4320         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4321                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4322
4323         /* AMD_CG_SUPPORT_GFX_CGLS */
4324         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4325                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4326
4327         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4328         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4329         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4330                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4331
4332         /* AMD_CG_SUPPORT_GFX_CP_LS */
4333         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4334         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4335                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4336
4337         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4338         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4339         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4340                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4341
4342         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4343         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4344                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4345 }
4346
4347 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4348 {
4349         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4350 }
4351
4352 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4353 {
4354         struct amdgpu_device *adev = ring->adev;
4355         u64 wptr;
4356
4357         /* XXX check if swapping is necessary on BE */
4358         if (ring->use_doorbell) {
4359                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4360         } else {
4361                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4362                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4363         }
4364
4365         return wptr;
4366 }
4367
4368 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4369 {
4370         struct amdgpu_device *adev = ring->adev;
4371
4372         if (ring->use_doorbell) {
4373                 /* XXX check if swapping is necessary on BE */
4374                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4375                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4376         } else {
4377                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4378                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4379         }
4380 }
4381
4382 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4383 {
4384         struct amdgpu_device *adev = ring->adev;
4385         u32 ref_and_mask, reg_mem_engine;
4386         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4387
4388         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4389                 switch (ring->me) {
4390                 case 1:
4391                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4392                         break;
4393                 case 2:
4394                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4395                         break;
4396                 default:
4397                         return;
4398                 }
4399                 reg_mem_engine = 0;
4400         } else {
4401                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4402                 reg_mem_engine = 1; /* pfp */
4403         }
4404
4405         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4406                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4407                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4408                               ref_and_mask, ref_and_mask, 0x20);
4409 }
4410
4411 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4412                                         struct amdgpu_job *job,
4413                                         struct amdgpu_ib *ib,
4414                                         uint32_t flags)
4415 {
4416         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4417         u32 header, control = 0;
4418
4419         if (ib->flags & AMDGPU_IB_FLAG_CE)
4420                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4421         else
4422                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4423
4424         control |= ib->length_dw | (vmid << 24);
4425
4426         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4427                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4428
4429                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4430                         gfx_v9_0_ring_emit_de_meta(ring);
4431         }
4432
4433         amdgpu_ring_write(ring, header);
4434         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4435         amdgpu_ring_write(ring,
4436 #ifdef __BIG_ENDIAN
4437                 (2 << 0) |
4438 #endif
4439                 lower_32_bits(ib->gpu_addr));
4440         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4441         amdgpu_ring_write(ring, control);
4442 }
4443
4444 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4445                                           struct amdgpu_job *job,
4446                                           struct amdgpu_ib *ib,
4447                                           uint32_t flags)
4448 {
4449         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4450         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4451
4452         /* Currently, there is a high possibility to get wave ID mismatch
4453          * between ME and GDS, leading to a hw deadlock, because ME generates
4454          * different wave IDs than the GDS expects. This situation happens
4455          * randomly when at least 5 compute pipes use GDS ordered append.
4456          * The wave IDs generated by ME are also wrong after suspend/resume.
4457          * Those are probably bugs somewhere else in the kernel driver.
4458          *
4459          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4460          * GDS to 0 for this ring (me/pipe).
4461          */
4462         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4463                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4464                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4465                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4466         }
4467
4468         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4469         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4470         amdgpu_ring_write(ring,
4471 #ifdef __BIG_ENDIAN
4472                                 (2 << 0) |
4473 #endif
4474                                 lower_32_bits(ib->gpu_addr));
4475         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4476         amdgpu_ring_write(ring, control);
4477 }
4478
4479 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4480                                      u64 seq, unsigned flags)
4481 {
4482         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4483         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4484         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4485
4486         /* RELEASE_MEM - flush caches, send int */
4487         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4488         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4489                                                EOP_TC_NC_ACTION_EN) :
4490                                               (EOP_TCL1_ACTION_EN |
4491                                                EOP_TC_ACTION_EN |
4492                                                EOP_TC_WB_ACTION_EN |
4493                                                EOP_TC_MD_ACTION_EN)) |
4494                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4495                                  EVENT_INDEX(5)));
4496         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4497
4498         /*
4499          * the address should be Qword aligned if 64bit write, Dword
4500          * aligned if only send 32bit data low (discard data high)
4501          */
4502         if (write64bit)
4503                 BUG_ON(addr & 0x7);
4504         else
4505                 BUG_ON(addr & 0x3);
4506         amdgpu_ring_write(ring, lower_32_bits(addr));
4507         amdgpu_ring_write(ring, upper_32_bits(addr));
4508         amdgpu_ring_write(ring, lower_32_bits(seq));
4509         amdgpu_ring_write(ring, upper_32_bits(seq));
4510         amdgpu_ring_write(ring, 0);
4511 }
4512
4513 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4514 {
4515         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4516         uint32_t seq = ring->fence_drv.sync_seq;
4517         uint64_t addr = ring->fence_drv.gpu_addr;
4518
4519         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4520                               lower_32_bits(addr), upper_32_bits(addr),
4521                               seq, 0xffffffff, 4);
4522 }
4523
4524 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4525                                         unsigned vmid, uint64_t pd_addr)
4526 {
4527         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4528
4529         /* compute doesn't have PFP */
4530         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4531                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4532                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4533                 amdgpu_ring_write(ring, 0x0);
4534         }
4535 }
4536
4537 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4538 {
4539         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4540 }
4541
4542 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4543 {
4544         u64 wptr;
4545
4546         /* XXX check if swapping is necessary on BE */
4547         if (ring->use_doorbell)
4548                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4549         else
4550                 BUG();
4551         return wptr;
4552 }
4553
4554 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4555                                            bool acquire)
4556 {
4557         struct amdgpu_device *adev = ring->adev;
4558         int pipe_num, tmp, reg;
4559         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4560
4561         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4562
4563         /* first me only has 2 entries, GFX and HP3D */
4564         if (ring->me > 0)
4565                 pipe_num -= 2;
4566
4567         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4568         tmp = RREG32(reg);
4569         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4570         WREG32(reg, tmp);
4571 }
4572
4573 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4574                                             struct amdgpu_ring *ring,
4575                                             bool acquire)
4576 {
4577         int i, pipe;
4578         bool reserve;
4579         struct amdgpu_ring *iring;
4580
4581         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4582         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4583         if (acquire)
4584                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4585         else
4586                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4587
4588         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4589                 /* Clear all reservations - everyone reacquires all resources */
4590                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4591                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4592                                                        true);
4593
4594                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4595                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4596                                                        true);
4597         } else {
4598                 /* Lower all pipes without a current reservation */
4599                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4600                         iring = &adev->gfx.gfx_ring[i];
4601                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4602                                                            iring->me,
4603                                                            iring->pipe,
4604                                                            0);
4605                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4606                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4607                 }
4608
4609                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4610                         iring = &adev->gfx.compute_ring[i];
4611                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4612                                                            iring->me,
4613                                                            iring->pipe,
4614                                                            0);
4615                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4616                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4617                 }
4618         }
4619
4620         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4621 }
4622
4623 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4624                                       struct amdgpu_ring *ring,
4625                                       bool acquire)
4626 {
4627         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4628         uint32_t queue_priority = acquire ? 0xf : 0x0;
4629
4630         mutex_lock(&adev->srbm_mutex);
4631         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4632
4633         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4634         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4635
4636         soc15_grbm_select(adev, 0, 0, 0, 0);
4637         mutex_unlock(&adev->srbm_mutex);
4638 }
4639
4640 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4641                                                enum drm_sched_priority priority)
4642 {
4643         struct amdgpu_device *adev = ring->adev;
4644         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4645
4646         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4647                 return;
4648
4649         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4650         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4651 }
4652
4653 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4654 {
4655         struct amdgpu_device *adev = ring->adev;
4656
4657         /* XXX check if swapping is necessary on BE */
4658         if (ring->use_doorbell) {
4659                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4660                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4661         } else{
4662                 BUG(); /* only DOORBELL method supported on gfx9 now */
4663         }
4664 }
4665
4666 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4667                                          u64 seq, unsigned int flags)
4668 {
4669         struct amdgpu_device *adev = ring->adev;
4670
4671         /* we only allocate 32bit for each seq wb address */
4672         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4673
4674         /* write fence seq to the "addr" */
4675         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4676         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4677                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4678         amdgpu_ring_write(ring, lower_32_bits(addr));
4679         amdgpu_ring_write(ring, upper_32_bits(addr));
4680         amdgpu_ring_write(ring, lower_32_bits(seq));
4681
4682         if (flags & AMDGPU_FENCE_FLAG_INT) {
4683                 /* set register to trigger INT */
4684                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4685                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4686                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4687                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4688                 amdgpu_ring_write(ring, 0);
4689                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4690         }
4691 }
4692
4693 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4694 {
4695         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4696         amdgpu_ring_write(ring, 0);
4697 }
4698
4699 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4700 {
4701         struct v9_ce_ib_state ce_payload = {0};
4702         uint64_t csa_addr;
4703         int cnt;
4704
4705         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4706         csa_addr = amdgpu_csa_vaddr(ring->adev);
4707
4708         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4709         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4710                                  WRITE_DATA_DST_SEL(8) |
4711                                  WR_CONFIRM) |
4712                                  WRITE_DATA_CACHE_POLICY(0));
4713         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4714         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4715         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4716 }
4717
4718 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4719 {
4720         struct v9_de_ib_state de_payload = {0};
4721         uint64_t csa_addr, gds_addr;
4722         int cnt;
4723
4724         csa_addr = amdgpu_csa_vaddr(ring->adev);
4725         gds_addr = csa_addr + 4096;
4726         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4727         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4728
4729         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4730         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4731         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4732                                  WRITE_DATA_DST_SEL(8) |
4733                                  WR_CONFIRM) |
4734                                  WRITE_DATA_CACHE_POLICY(0));
4735         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4736         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4737         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4738 }
4739
4740 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4741 {
4742         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4743         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4744 }
4745
4746 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4747 {
4748         uint32_t dw2 = 0;
4749
4750         if (amdgpu_sriov_vf(ring->adev))
4751                 gfx_v9_0_ring_emit_ce_meta(ring);
4752
4753         gfx_v9_0_ring_emit_tmz(ring, true);
4754
4755         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4756         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4757                 /* set load_global_config & load_global_uconfig */
4758                 dw2 |= 0x8001;
4759                 /* set load_cs_sh_regs */
4760                 dw2 |= 0x01000000;
4761                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4762                 dw2 |= 0x10002;
4763
4764                 /* set load_ce_ram if preamble presented */
4765                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4766                         dw2 |= 0x10000000;
4767         } else {
4768                 /* still load_ce_ram if this is the first time preamble presented
4769                  * although there is no context switch happens.
4770                  */
4771                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4772                         dw2 |= 0x10000000;
4773         }
4774
4775         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4776         amdgpu_ring_write(ring, dw2);
4777         amdgpu_ring_write(ring, 0);
4778 }
4779
4780 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4781 {
4782         unsigned ret;
4783         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4784         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4785         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4786         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4787         ret = ring->wptr & ring->buf_mask;
4788         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4789         return ret;
4790 }
4791
4792 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4793 {
4794         unsigned cur;
4795         BUG_ON(offset > ring->buf_mask);
4796         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4797
4798         cur = (ring->wptr & ring->buf_mask) - 1;
4799         if (likely(cur > offset))
4800                 ring->ring[offset] = cur - offset;
4801         else
4802                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4803 }
4804
4805 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4806 {
4807         struct amdgpu_device *adev = ring->adev;
4808
4809         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4810         amdgpu_ring_write(ring, 0 |     /* src: register*/
4811                                 (5 << 8) |      /* dst: memory */
4812                                 (1 << 20));     /* write confirm */
4813         amdgpu_ring_write(ring, reg);
4814         amdgpu_ring_write(ring, 0);
4815         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4816                                 adev->virt.reg_val_offs * 4));
4817         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4818                                 adev->virt.reg_val_offs * 4));
4819 }
4820
4821 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4822                                     uint32_t val)
4823 {
4824         uint32_t cmd = 0;
4825
4826         switch (ring->funcs->type) {
4827         case AMDGPU_RING_TYPE_GFX:
4828                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4829                 break;
4830         case AMDGPU_RING_TYPE_KIQ:
4831                 cmd = (1 << 16); /* no inc addr */
4832                 break;
4833         default:
4834                 cmd = WR_CONFIRM;
4835                 break;
4836         }
4837         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4838         amdgpu_ring_write(ring, cmd);
4839         amdgpu_ring_write(ring, reg);
4840         amdgpu_ring_write(ring, 0);
4841         amdgpu_ring_write(ring, val);
4842 }
4843
4844 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4845                                         uint32_t val, uint32_t mask)
4846 {
4847         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4848 }
4849
4850 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4851                                                   uint32_t reg0, uint32_t reg1,
4852                                                   uint32_t ref, uint32_t mask)
4853 {
4854         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4855         struct amdgpu_device *adev = ring->adev;
4856         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4857                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4858
4859         if (fw_version_ok)
4860                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4861                                       ref, mask, 0x20);
4862         else
4863                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4864                                                            ref, mask);
4865 }
4866
4867 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4868 {
4869         struct amdgpu_device *adev = ring->adev;
4870         uint32_t value = 0;
4871
4872         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4873         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4874         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4875         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4876         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
4877 }
4878
4879 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4880                                                  enum amdgpu_interrupt_state state)
4881 {
4882         switch (state) {
4883         case AMDGPU_IRQ_STATE_DISABLE:
4884         case AMDGPU_IRQ_STATE_ENABLE:
4885                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4886                                TIME_STAMP_INT_ENABLE,
4887                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4888                 break;
4889         default:
4890                 break;
4891         }
4892 }
4893
4894 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4895                                                      int me, int pipe,
4896                                                      enum amdgpu_interrupt_state state)
4897 {
4898         u32 mec_int_cntl, mec_int_cntl_reg;
4899
4900         /*
4901          * amdgpu controls only the first MEC. That's why this function only
4902          * handles the setting of interrupts for this specific MEC. All other
4903          * pipes' interrupts are set by amdkfd.
4904          */
4905
4906         if (me == 1) {
4907                 switch (pipe) {
4908                 case 0:
4909                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4910                         break;
4911                 case 1:
4912                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4913                         break;
4914                 case 2:
4915                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4916                         break;
4917                 case 3:
4918                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4919                         break;
4920                 default:
4921                         DRM_DEBUG("invalid pipe %d\n", pipe);
4922                         return;
4923                 }
4924         } else {
4925                 DRM_DEBUG("invalid me %d\n", me);
4926                 return;
4927         }
4928
4929         switch (state) {
4930         case AMDGPU_IRQ_STATE_DISABLE:
4931                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4932                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4933                                              TIME_STAMP_INT_ENABLE, 0);
4934                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4935                 break;
4936         case AMDGPU_IRQ_STATE_ENABLE:
4937                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4938                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4939                                              TIME_STAMP_INT_ENABLE, 1);
4940                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4941                 break;
4942         default:
4943                 break;
4944         }
4945 }
4946
4947 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4948                                              struct amdgpu_irq_src *source,
4949                                              unsigned type,
4950                                              enum amdgpu_interrupt_state state)
4951 {
4952         switch (state) {
4953         case AMDGPU_IRQ_STATE_DISABLE:
4954         case AMDGPU_IRQ_STATE_ENABLE:
4955                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4956                                PRIV_REG_INT_ENABLE,
4957                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4958                 break;
4959         default:
4960                 break;
4961         }
4962
4963         return 0;
4964 }
4965
4966 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4967                                               struct amdgpu_irq_src *source,
4968                                               unsigned type,
4969                                               enum amdgpu_interrupt_state state)
4970 {
4971         switch (state) {
4972         case AMDGPU_IRQ_STATE_DISABLE:
4973         case AMDGPU_IRQ_STATE_ENABLE:
4974                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4975                                PRIV_INSTR_INT_ENABLE,
4976                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4977         default:
4978                 break;
4979         }
4980
4981         return 0;
4982 }
4983
4984 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4985         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4986                         CP_ECC_ERROR_INT_ENABLE, 1)
4987
4988 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4989         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4990                         CP_ECC_ERROR_INT_ENABLE, 0)
4991
4992 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4993                                               struct amdgpu_irq_src *source,
4994                                               unsigned type,
4995                                               enum amdgpu_interrupt_state state)
4996 {
4997         switch (state) {
4998         case AMDGPU_IRQ_STATE_DISABLE:
4999                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5000                                 CP_ECC_ERROR_INT_ENABLE, 0);
5001                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5002                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5003                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5004                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5005                 break;
5006
5007         case AMDGPU_IRQ_STATE_ENABLE:
5008                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5009                                 CP_ECC_ERROR_INT_ENABLE, 1);
5010                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5011                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5012                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5013                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5014                 break;
5015         default:
5016                 break;
5017         }
5018
5019         return 0;
5020 }
5021
5022
5023 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5024                                             struct amdgpu_irq_src *src,
5025                                             unsigned type,
5026                                             enum amdgpu_interrupt_state state)
5027 {
5028         switch (type) {
5029         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5030                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5031                 break;
5032         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5033                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5034                 break;
5035         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5036                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5037                 break;
5038         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5039                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5040                 break;
5041         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5042                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5043                 break;
5044         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5045                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5046                 break;
5047         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5048                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5049                 break;
5050         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5051                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5052                 break;
5053         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5054                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5055                 break;
5056         default:
5057                 break;
5058         }
5059         return 0;
5060 }
5061
5062 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5063                             struct amdgpu_irq_src *source,
5064                             struct amdgpu_iv_entry *entry)
5065 {
5066         int i;
5067         u8 me_id, pipe_id, queue_id;
5068         struct amdgpu_ring *ring;
5069
5070         DRM_DEBUG("IH: CP EOP\n");
5071         me_id = (entry->ring_id & 0x0c) >> 2;
5072         pipe_id = (entry->ring_id & 0x03) >> 0;
5073         queue_id = (entry->ring_id & 0x70) >> 4;
5074
5075         switch (me_id) {
5076         case 0:
5077                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5078                 break;
5079         case 1:
5080         case 2:
5081                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5082                         ring = &adev->gfx.compute_ring[i];
5083                         /* Per-queue interrupt is supported for MEC starting from VI.
5084                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5085                           */
5086                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5087                                 amdgpu_fence_process(ring);
5088                 }
5089                 break;
5090         }
5091         return 0;
5092 }
5093
5094 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5095                            struct amdgpu_iv_entry *entry)
5096 {
5097         u8 me_id, pipe_id, queue_id;
5098         struct amdgpu_ring *ring;
5099         int i;
5100
5101         me_id = (entry->ring_id & 0x0c) >> 2;
5102         pipe_id = (entry->ring_id & 0x03) >> 0;
5103         queue_id = (entry->ring_id & 0x70) >> 4;
5104
5105         switch (me_id) {
5106         case 0:
5107                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5108                 break;
5109         case 1:
5110         case 2:
5111                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5112                         ring = &adev->gfx.compute_ring[i];
5113                         if (ring->me == me_id && ring->pipe == pipe_id &&
5114                             ring->queue == queue_id)
5115                                 drm_sched_fault(&ring->sched);
5116                 }
5117                 break;
5118         }
5119 }
5120
5121 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5122                                  struct amdgpu_irq_src *source,
5123                                  struct amdgpu_iv_entry *entry)
5124 {
5125         DRM_ERROR("Illegal register access in command stream\n");
5126         gfx_v9_0_fault(adev, entry);
5127         return 0;
5128 }
5129
5130 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5131                                   struct amdgpu_irq_src *source,
5132                                   struct amdgpu_iv_entry *entry)
5133 {
5134         DRM_ERROR("Illegal instruction in command stream\n");
5135         gfx_v9_0_fault(adev, entry);
5136         return 0;
5137 }
5138
5139 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5140                 struct amdgpu_iv_entry *entry)
5141 {
5142         /* TODO ue will trigger an interrupt. */
5143         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5144         amdgpu_ras_reset_gpu(adev, 0);
5145         return AMDGPU_RAS_UE;
5146 }
5147
5148 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5149                                   struct amdgpu_irq_src *source,
5150                                   struct amdgpu_iv_entry *entry)
5151 {
5152         struct ras_common_if *ras_if = adev->gfx.ras_if;
5153         struct ras_dispatch_if ih_data = {
5154                 .entry = entry,
5155         };
5156
5157         if (!ras_if)
5158                 return 0;
5159
5160         ih_data.head = *ras_if;
5161
5162         DRM_ERROR("CP ECC ERROR IRQ\n");
5163         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5164         return 0;
5165 }
5166
5167 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5168         .name = "gfx_v9_0",
5169         .early_init = gfx_v9_0_early_init,
5170         .late_init = gfx_v9_0_late_init,
5171         .sw_init = gfx_v9_0_sw_init,
5172         .sw_fini = gfx_v9_0_sw_fini,
5173         .hw_init = gfx_v9_0_hw_init,
5174         .hw_fini = gfx_v9_0_hw_fini,
5175         .suspend = gfx_v9_0_suspend,
5176         .resume = gfx_v9_0_resume,
5177         .is_idle = gfx_v9_0_is_idle,
5178         .wait_for_idle = gfx_v9_0_wait_for_idle,
5179         .soft_reset = gfx_v9_0_soft_reset,
5180         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5181         .set_powergating_state = gfx_v9_0_set_powergating_state,
5182         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5183 };
5184
5185 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5186         .type = AMDGPU_RING_TYPE_GFX,
5187         .align_mask = 0xff,
5188         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5189         .support_64bit_ptrs = true,
5190         .vmhub = AMDGPU_GFXHUB,
5191         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5192         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5193         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5194         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5195                 5 +  /* COND_EXEC */
5196                 7 +  /* PIPELINE_SYNC */
5197                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5198                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5199                 2 + /* VM_FLUSH */
5200                 8 +  /* FENCE for VM_FLUSH */
5201                 20 + /* GDS switch */
5202                 4 + /* double SWITCH_BUFFER,
5203                        the first COND_EXEC jump to the place just
5204                            prior to this double SWITCH_BUFFER  */
5205                 5 + /* COND_EXEC */
5206                 7 +      /*     HDP_flush */
5207                 4 +      /*     VGT_flush */
5208                 14 + /* CE_META */
5209                 31 + /* DE_META */
5210                 3 + /* CNTX_CTRL */
5211                 5 + /* HDP_INVL */
5212                 8 + 8 + /* FENCE x2 */
5213                 2, /* SWITCH_BUFFER */
5214         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5215         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5216         .emit_fence = gfx_v9_0_ring_emit_fence,
5217         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5218         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5219         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5220         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5221         .test_ring = gfx_v9_0_ring_test_ring,
5222         .test_ib = gfx_v9_0_ring_test_ib,
5223         .insert_nop = amdgpu_ring_insert_nop,
5224         .pad_ib = amdgpu_ring_generic_pad_ib,
5225         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5226         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5227         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5228         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5229         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5230         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5231         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5232         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5233         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5234 };
5235
5236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5237         .type = AMDGPU_RING_TYPE_COMPUTE,
5238         .align_mask = 0xff,
5239         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5240         .support_64bit_ptrs = true,
5241         .vmhub = AMDGPU_GFXHUB,
5242         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5243         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5244         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5245         .emit_frame_size =
5246                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5247                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5248                 5 + /* hdp invalidate */
5249                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5250                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5251                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5252                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5253                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5254         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5255         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5256         .emit_fence = gfx_v9_0_ring_emit_fence,
5257         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5258         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5259         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5260         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5261         .test_ring = gfx_v9_0_ring_test_ring,
5262         .test_ib = gfx_v9_0_ring_test_ib,
5263         .insert_nop = amdgpu_ring_insert_nop,
5264         .pad_ib = amdgpu_ring_generic_pad_ib,
5265         .set_priority = gfx_v9_0_ring_set_priority_compute,
5266         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5267         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5268         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5269 };
5270
5271 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5272         .type = AMDGPU_RING_TYPE_KIQ,
5273         .align_mask = 0xff,
5274         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5275         .support_64bit_ptrs = true,
5276         .vmhub = AMDGPU_GFXHUB,
5277         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5278         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5279         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5280         .emit_frame_size =
5281                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5282                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5283                 5 + /* hdp invalidate */
5284                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5285                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5286                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5287                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5288                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5289         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5290         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5291         .test_ring = gfx_v9_0_ring_test_ring,
5292         .insert_nop = amdgpu_ring_insert_nop,
5293         .pad_ib = amdgpu_ring_generic_pad_ib,
5294         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5295         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5296         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5297         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5298 };
5299
5300 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5301 {
5302         int i;
5303
5304         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5305
5306         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5307                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5308
5309         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5310                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5311 }
5312
5313 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5314         .set = gfx_v9_0_set_eop_interrupt_state,
5315         .process = gfx_v9_0_eop_irq,
5316 };
5317
5318 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5319         .set = gfx_v9_0_set_priv_reg_fault_state,
5320         .process = gfx_v9_0_priv_reg_irq,
5321 };
5322
5323 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5324         .set = gfx_v9_0_set_priv_inst_fault_state,
5325         .process = gfx_v9_0_priv_inst_irq,
5326 };
5327
5328 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5329         .set = gfx_v9_0_set_cp_ecc_error_state,
5330         .process = gfx_v9_0_cp_ecc_error_irq,
5331 };
5332
5333
5334 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5335 {
5336         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5337         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5338
5339         adev->gfx.priv_reg_irq.num_types = 1;
5340         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5341
5342         adev->gfx.priv_inst_irq.num_types = 1;
5343         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5344
5345         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5346         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5347 }
5348
5349 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5350 {
5351         switch (adev->asic_type) {
5352         case CHIP_VEGA10:
5353         case CHIP_VEGA12:
5354         case CHIP_VEGA20:
5355         case CHIP_RAVEN:
5356                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5357                 break;
5358         default:
5359                 break;
5360         }
5361 }
5362
5363 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5364 {
5365         /* init asci gds info */
5366         switch (adev->asic_type) {
5367         case CHIP_VEGA10:
5368         case CHIP_VEGA12:
5369         case CHIP_VEGA20:
5370                 adev->gds.gds_size = 0x10000;
5371                 break;
5372         case CHIP_RAVEN:
5373                 adev->gds.gds_size = 0x1000;
5374                 break;
5375         default:
5376                 adev->gds.gds_size = 0x10000;
5377                 break;
5378         }
5379
5380         switch (adev->asic_type) {
5381         case CHIP_VEGA10:
5382         case CHIP_VEGA20:
5383                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5384                 break;
5385         case CHIP_VEGA12:
5386                 adev->gds.gds_compute_max_wave_id = 0x27f;
5387                 break;
5388         case CHIP_RAVEN:
5389                 if (adev->rev_id >= 0x8)
5390                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5391                 else
5392                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5393                 break;
5394         default:
5395                 /* this really depends on the chip */
5396                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5397                 break;
5398         }
5399
5400         adev->gds.gws_size = 64;
5401         adev->gds.oa_size = 16;
5402 }
5403
5404 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5405                                                  u32 bitmap)
5406 {
5407         u32 data;
5408
5409         if (!bitmap)
5410                 return;
5411
5412         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5413         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5414
5415         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5416 }
5417
5418 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5419 {
5420         u32 data, mask;
5421
5422         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5423         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5424
5425         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5426         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5427
5428         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5429
5430         return (~data) & mask;
5431 }
5432
5433 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5434                                  struct amdgpu_cu_info *cu_info)
5435 {
5436         int i, j, k, counter, active_cu_number = 0;
5437         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5438         unsigned disable_masks[4 * 2];
5439
5440         if (!adev || !cu_info)
5441                 return -EINVAL;
5442
5443         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5444
5445         mutex_lock(&adev->grbm_idx_mutex);
5446         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5447                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5448                         mask = 1;
5449                         ao_bitmap = 0;
5450                         counter = 0;
5451                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5452                         if (i < 4 && j < 2)
5453                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5454                                         adev, disable_masks[i * 2 + j]);
5455                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5456                         cu_info->bitmap[i][j] = bitmap;
5457
5458                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5459                                 if (bitmap & mask) {
5460                                         if (counter < adev->gfx.config.max_cu_per_sh)
5461                                                 ao_bitmap |= mask;
5462                                         counter ++;
5463                                 }
5464                                 mask <<= 1;
5465                         }
5466                         active_cu_number += counter;
5467                         if (i < 2 && j < 2)
5468                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5469                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5470                 }
5471         }
5472         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5473         mutex_unlock(&adev->grbm_idx_mutex);
5474
5475         cu_info->number = active_cu_number;
5476         cu_info->ao_cu_mask = ao_cu_mask;
5477         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5478
5479         return 0;
5480 }
5481
5482 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5483 {
5484         .type = AMD_IP_BLOCK_TYPE_GFX,
5485         .major = 9,
5486         .minor = 0,
5487         .rev = 0,
5488         .funcs = &gfx_v9_0_ip_funcs,
5489 };