2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
40 #include "vega10_enum.h"
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 #include "amdgpu_ras.h"
52 #include "gfx_v9_4_2.h"
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
58 #define GFX9_NUM_GFX_RINGS 1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
63 #define mmGCEA_PROBE_MAP 0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX 0
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
130 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
132 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
134 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
136 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
138 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
140 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
143 enum ta_ras_gfx_subblock {
145 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
146 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
147 TA_RAS_BLOCK__GFX_CPC_UCODE,
148 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
149 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
150 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
151 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
152 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
153 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
154 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
156 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
157 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
158 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
159 TA_RAS_BLOCK__GFX_CPF_TAG,
160 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
162 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
163 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
164 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
165 TA_RAS_BLOCK__GFX_CPG_TAG,
166 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
168 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
169 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
170 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
171 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
172 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
173 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
174 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
176 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
178 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
179 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
180 TA_RAS_BLOCK__GFX_SQ_LDS_D,
181 TA_RAS_BLOCK__GFX_SQ_LDS_I,
182 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
183 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
185 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
187 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
188 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
189 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
190 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
191 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
192 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
193 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
194 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
195 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
196 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
197 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
199 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
200 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
201 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
202 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
203 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
204 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
205 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
206 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
207 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
208 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
209 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
210 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
213 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
214 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
215 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
216 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
217 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
218 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
219 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
220 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
221 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
222 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
223 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
224 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
226 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
228 TA_RAS_BLOCK__GFX_TA_INDEX_START,
229 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
230 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
231 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
232 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
233 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
234 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
236 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
237 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
238 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
239 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
240 /* TCC (5 sub-ranges)*/
241 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
243 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
244 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
245 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
246 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
247 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
248 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
249 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
250 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
251 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
252 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
254 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
255 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
256 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
257 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
258 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
260 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
261 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
262 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
263 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
264 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
265 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
266 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
267 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
268 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
269 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
270 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
272 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
273 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
274 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
275 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
276 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
278 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
279 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
280 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
281 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
282 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
283 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
284 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
286 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
288 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
289 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
290 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
291 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
292 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
293 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
294 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
295 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
296 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
298 TA_RAS_BLOCK__GFX_TD_INDEX_START,
299 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
300 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
301 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
302 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
303 /* EA (3 sub-ranges)*/
304 TA_RAS_BLOCK__GFX_EA_INDEX_START,
306 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
307 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
308 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
309 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
310 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
311 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
312 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
313 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
314 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
315 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
317 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
318 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
319 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
320 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
321 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
322 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
323 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
324 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
325 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
327 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
328 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
329 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
330 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
331 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
332 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
333 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
335 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
337 TA_RAS_BLOCK__UTC_VML2_WALKER,
338 /* UTC ATC L2 2MB cache*/
339 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
340 /* UTC ATC L2 4KB cache*/
341 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
342 TA_RAS_BLOCK__GFX_MAX
345 struct ras_gfx_subblock {
348 int hw_supported_error_type;
349 int sw_supported_error_type;
352 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
353 [AMDGPU_RAS_BLOCK__##subblock] = { \
355 TA_RAS_BLOCK__##subblock, \
356 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
357 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
360 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
361 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
362 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
363 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
365 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
366 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
367 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
369 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
371 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
372 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
373 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
374 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
375 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
376 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
377 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
379 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
381 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
382 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
383 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
384 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
385 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
386 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
388 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
396 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
400 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
402 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
404 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
410 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
412 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
416 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
420 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
422 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
428 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
430 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
432 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
434 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
436 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
437 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
438 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
439 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
440 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
442 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
444 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
474 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
475 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
476 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
477 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
479 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
482 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
483 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
484 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
510 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
534 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
556 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
571 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
610 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
633 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
649 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
656 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
676 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
693 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
708 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
709 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
710 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
713 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
715 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
720 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
727 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
732 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
739 static void *scratch_reg0;
740 static void *scratch_reg1;
741 static void *scratch_reg2;
742 static void *scratch_reg3;
743 static void *spare_int;
744 static uint32_t grbm_cntl;
745 static uint32_t grbm_idx;
747 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
748 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
749 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
750 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
751 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
753 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
754 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
756 if (amdgpu_sriov_runtime(adev)) {
757 pr_err("shouldn't call rlcg write register during runtime\n");
761 if (offset == grbm_cntl || offset == grbm_idx) {
762 if (offset == grbm_cntl)
763 writel(v, scratch_reg2);
764 else if (offset == grbm_idx)
765 writel(v, scratch_reg3);
767 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
770 uint32_t retries = 50000;
772 writel(v, scratch_reg0);
773 writel(offset | 0x80000000, scratch_reg1);
774 writel(1, spare_int);
775 for (i = 0; i < retries; i++) {
778 tmp = readl(scratch_reg1);
779 if (!(tmp & 0x80000000))
785 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
790 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
791 u32 v, u32 acc_flags, u32 hwip)
793 if ((acc_flags & AMDGPU_REGS_RLC) &&
794 amdgpu_sriov_fullaccess(adev)) {
795 gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
800 if (acc_flags & AMDGPU_REGS_NO_KIQ)
801 WREG32_NO_KIQ(offset, v);
806 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
807 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
808 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
809 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
811 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
812 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
813 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
814 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
815 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
816 struct amdgpu_cu_info *cu_info);
817 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
818 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
819 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
820 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
821 void *ras_error_status);
822 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
824 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
826 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
829 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
830 amdgpu_ring_write(kiq_ring,
831 PACKET3_SET_RESOURCES_VMID_MASK(0) |
832 /* vmid_mask:0* queue_type:0 (KIQ) */
833 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
834 amdgpu_ring_write(kiq_ring,
835 lower_32_bits(queue_mask)); /* queue mask lo */
836 amdgpu_ring_write(kiq_ring,
837 upper_32_bits(queue_mask)); /* queue mask hi */
838 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
839 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
840 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
841 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
844 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
845 struct amdgpu_ring *ring)
847 struct amdgpu_device *adev = kiq_ring->adev;
848 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
849 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
850 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
852 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
853 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
854 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
855 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
856 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
857 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
858 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
859 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
860 /*queue_type: normal compute queue */
861 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
862 /* alloc format: all_on_one_pipe */
863 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
864 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
865 /* num_queues: must be 1 */
866 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
867 amdgpu_ring_write(kiq_ring,
868 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
869 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
870 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
871 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
872 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
875 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
876 struct amdgpu_ring *ring,
877 enum amdgpu_unmap_queues_action action,
878 u64 gpu_addr, u64 seq)
880 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
882 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
883 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
884 PACKET3_UNMAP_QUEUES_ACTION(action) |
885 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
886 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
887 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
888 amdgpu_ring_write(kiq_ring,
889 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
891 if (action == PREEMPT_QUEUES_NO_UNMAP) {
892 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
893 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
894 amdgpu_ring_write(kiq_ring, seq);
896 amdgpu_ring_write(kiq_ring, 0);
897 amdgpu_ring_write(kiq_ring, 0);
898 amdgpu_ring_write(kiq_ring, 0);
902 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
903 struct amdgpu_ring *ring,
907 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
910 amdgpu_ring_write(kiq_ring,
911 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
912 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
913 PACKET3_QUERY_STATUS_COMMAND(2));
914 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
915 amdgpu_ring_write(kiq_ring,
916 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
917 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
918 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
919 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
920 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
921 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
924 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
925 uint16_t pasid, uint32_t flush_type,
928 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
929 amdgpu_ring_write(kiq_ring,
930 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
931 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
932 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
933 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
936 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
937 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
938 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
939 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
940 .kiq_query_status = gfx_v9_0_kiq_query_status,
941 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
942 .set_resources_size = 8,
943 .map_queues_size = 7,
944 .unmap_queues_size = 6,
945 .query_status_size = 7,
946 .invalidate_tlbs_size = 2,
949 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
951 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
954 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
956 switch (adev->ip_versions[GC_HWIP][0]) {
957 case IP_VERSION(9, 0, 1):
958 soc15_program_register_sequence(adev,
959 golden_settings_gc_9_0,
960 ARRAY_SIZE(golden_settings_gc_9_0));
961 soc15_program_register_sequence(adev,
962 golden_settings_gc_9_0_vg10,
963 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
965 case IP_VERSION(9, 2, 1):
966 soc15_program_register_sequence(adev,
967 golden_settings_gc_9_2_1,
968 ARRAY_SIZE(golden_settings_gc_9_2_1));
969 soc15_program_register_sequence(adev,
970 golden_settings_gc_9_2_1_vg12,
971 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
973 case IP_VERSION(9, 4, 0):
974 soc15_program_register_sequence(adev,
975 golden_settings_gc_9_0,
976 ARRAY_SIZE(golden_settings_gc_9_0));
977 soc15_program_register_sequence(adev,
978 golden_settings_gc_9_0_vg20,
979 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
981 case IP_VERSION(9, 4, 1):
982 soc15_program_register_sequence(adev,
983 golden_settings_gc_9_4_1_arct,
984 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
986 case IP_VERSION(9, 2, 2):
987 case IP_VERSION(9, 1, 0):
988 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
989 ARRAY_SIZE(golden_settings_gc_9_1));
990 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
991 soc15_program_register_sequence(adev,
992 golden_settings_gc_9_1_rv2,
993 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
995 soc15_program_register_sequence(adev,
996 golden_settings_gc_9_1_rv1,
997 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
999 case IP_VERSION(9, 3, 0):
1000 soc15_program_register_sequence(adev,
1001 golden_settings_gc_9_1_rn,
1002 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1003 return; /* for renoir, don't need common goldensetting */
1004 case IP_VERSION(9, 4, 2):
1005 gfx_v9_4_2_init_golden_registers(adev,
1006 adev->smuio.funcs->get_die_id(adev));
1012 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1013 (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
1014 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1015 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1018 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1020 adev->gfx.scratch.num_reg = 8;
1021 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1022 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1025 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1026 bool wc, uint32_t reg, uint32_t val)
1028 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1029 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1030 WRITE_DATA_DST_SEL(0) |
1031 (wc ? WR_CONFIRM : 0));
1032 amdgpu_ring_write(ring, reg);
1033 amdgpu_ring_write(ring, 0);
1034 amdgpu_ring_write(ring, val);
1037 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1038 int mem_space, int opt, uint32_t addr0,
1039 uint32_t addr1, uint32_t ref, uint32_t mask,
1042 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1043 amdgpu_ring_write(ring,
1044 /* memory (1) or register (0) */
1045 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1046 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1047 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1048 WAIT_REG_MEM_ENGINE(eng_sel)));
1051 BUG_ON(addr0 & 0x3); /* Dword align */
1052 amdgpu_ring_write(ring, addr0);
1053 amdgpu_ring_write(ring, addr1);
1054 amdgpu_ring_write(ring, ref);
1055 amdgpu_ring_write(ring, mask);
1056 amdgpu_ring_write(ring, inv); /* poll interval */
1059 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1061 struct amdgpu_device *adev = ring->adev;
1067 r = amdgpu_gfx_scratch_get(adev, &scratch);
1071 WREG32(scratch, 0xCAFEDEAD);
1072 r = amdgpu_ring_alloc(ring, 3);
1074 goto error_free_scratch;
1076 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1077 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1078 amdgpu_ring_write(ring, 0xDEADBEEF);
1079 amdgpu_ring_commit(ring);
1081 for (i = 0; i < adev->usec_timeout; i++) {
1082 tmp = RREG32(scratch);
1083 if (tmp == 0xDEADBEEF)
1088 if (i >= adev->usec_timeout)
1092 amdgpu_gfx_scratch_free(adev, scratch);
1096 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1098 struct amdgpu_device *adev = ring->adev;
1099 struct amdgpu_ib ib;
1100 struct dma_fence *f = NULL;
1107 r = amdgpu_device_wb_get(adev, &index);
1111 gpu_addr = adev->wb.gpu_addr + (index * 4);
1112 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1113 memset(&ib, 0, sizeof(ib));
1114 r = amdgpu_ib_get(adev, NULL, 16,
1115 AMDGPU_IB_POOL_DIRECT, &ib);
1119 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1120 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1121 ib.ptr[2] = lower_32_bits(gpu_addr);
1122 ib.ptr[3] = upper_32_bits(gpu_addr);
1123 ib.ptr[4] = 0xDEADBEEF;
1126 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1130 r = dma_fence_wait_timeout(f, false, timeout);
1138 tmp = adev->wb.wb[index];
1139 if (tmp == 0xDEADBEEF)
1145 amdgpu_ib_free(adev, &ib, NULL);
1148 amdgpu_device_wb_free(adev, index);
1153 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1155 release_firmware(adev->gfx.pfp_fw);
1156 adev->gfx.pfp_fw = NULL;
1157 release_firmware(adev->gfx.me_fw);
1158 adev->gfx.me_fw = NULL;
1159 release_firmware(adev->gfx.ce_fw);
1160 adev->gfx.ce_fw = NULL;
1161 release_firmware(adev->gfx.rlc_fw);
1162 adev->gfx.rlc_fw = NULL;
1163 release_firmware(adev->gfx.mec_fw);
1164 adev->gfx.mec_fw = NULL;
1165 release_firmware(adev->gfx.mec2_fw);
1166 adev->gfx.mec2_fw = NULL;
1168 kfree(adev->gfx.rlc.register_list_format);
1171 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1173 const struct rlc_firmware_header_v2_1 *rlc_hdr;
1175 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1176 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1177 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1178 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1179 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1180 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1181 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1182 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1183 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1184 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1185 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1186 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1187 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1188 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1189 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1192 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1194 adev->gfx.me_fw_write_wait = false;
1195 adev->gfx.mec_fw_write_wait = false;
1197 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1198 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1199 (adev->gfx.mec_feature_version < 46) ||
1200 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1201 (adev->gfx.pfp_feature_version < 46)))
1202 DRM_WARN_ONCE("CP firmware version too old, please update!");
1204 switch (adev->ip_versions[GC_HWIP][0]) {
1205 case IP_VERSION(9, 0, 1):
1206 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1207 (adev->gfx.me_feature_version >= 42) &&
1208 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1209 (adev->gfx.pfp_feature_version >= 42))
1210 adev->gfx.me_fw_write_wait = true;
1212 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1213 (adev->gfx.mec_feature_version >= 42))
1214 adev->gfx.mec_fw_write_wait = true;
1216 case IP_VERSION(9, 2, 1):
1217 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1218 (adev->gfx.me_feature_version >= 44) &&
1219 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1220 (adev->gfx.pfp_feature_version >= 44))
1221 adev->gfx.me_fw_write_wait = true;
1223 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1224 (adev->gfx.mec_feature_version >= 44))
1225 adev->gfx.mec_fw_write_wait = true;
1227 case IP_VERSION(9, 4, 0):
1228 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1229 (adev->gfx.me_feature_version >= 44) &&
1230 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1231 (adev->gfx.pfp_feature_version >= 44))
1232 adev->gfx.me_fw_write_wait = true;
1234 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1235 (adev->gfx.mec_feature_version >= 44))
1236 adev->gfx.mec_fw_write_wait = true;
1238 case IP_VERSION(9, 1, 0):
1239 case IP_VERSION(9, 2, 2):
1240 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1241 (adev->gfx.me_feature_version >= 42) &&
1242 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1243 (adev->gfx.pfp_feature_version >= 42))
1244 adev->gfx.me_fw_write_wait = true;
1246 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1247 (adev->gfx.mec_feature_version >= 42))
1248 adev->gfx.mec_fw_write_wait = true;
1251 adev->gfx.me_fw_write_wait = true;
1252 adev->gfx.mec_fw_write_wait = true;
1257 struct amdgpu_gfxoff_quirk {
1265 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1266 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1267 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1268 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1269 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1270 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1271 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1275 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1277 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1279 while (p && p->chip_device != 0) {
1280 if (pdev->vendor == p->chip_vendor &&
1281 pdev->device == p->chip_device &&
1282 pdev->subsystem_vendor == p->subsys_vendor &&
1283 pdev->subsystem_device == p->subsys_device &&
1284 pdev->revision == p->revision) {
1292 static bool is_raven_kicker(struct amdgpu_device *adev)
1294 if (adev->pm.fw_version >= 0x41e2b)
1300 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1302 if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1303 (adev->gfx.me_fw_version >= 0x000000a5) &&
1304 (adev->gfx.me_feature_version >= 52))
1310 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1312 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1313 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1315 switch (adev->ip_versions[GC_HWIP][0]) {
1316 case IP_VERSION(9, 0, 1):
1317 case IP_VERSION(9, 2, 1):
1318 case IP_VERSION(9, 4, 0):
1320 case IP_VERSION(9, 2, 2):
1321 case IP_VERSION(9, 1, 0):
1322 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1323 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1324 ((!is_raven_kicker(adev) &&
1325 adev->gfx.rlc_fw_version < 531) ||
1326 (adev->gfx.rlc_feature_version < 1) ||
1327 !adev->gfx.rlc.is_rlc_v2_1))
1328 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1330 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1331 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1333 AMD_PG_SUPPORT_RLC_SMU_HS;
1335 case IP_VERSION(9, 3, 0):
1336 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1337 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1339 AMD_PG_SUPPORT_RLC_SMU_HS;
1346 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1347 const char *chip_name)
1351 struct amdgpu_firmware_info *info = NULL;
1352 const struct common_firmware_header *header = NULL;
1353 const struct gfx_firmware_header_v1_0 *cp_hdr;
1355 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1356 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1359 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1362 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1363 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1364 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1366 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1367 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1370 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1373 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1374 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1375 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1377 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1378 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1381 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1384 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1385 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1386 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1388 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1389 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1390 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1391 info->fw = adev->gfx.pfp_fw;
1392 header = (const struct common_firmware_header *)info->fw->data;
1393 adev->firmware.fw_size +=
1394 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1396 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1397 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1398 info->fw = adev->gfx.me_fw;
1399 header = (const struct common_firmware_header *)info->fw->data;
1400 adev->firmware.fw_size +=
1401 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1403 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1404 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1405 info->fw = adev->gfx.ce_fw;
1406 header = (const struct common_firmware_header *)info->fw->data;
1407 adev->firmware.fw_size +=
1408 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1414 "gfx9: Failed to load firmware \"%s\"\n",
1416 release_firmware(adev->gfx.pfp_fw);
1417 adev->gfx.pfp_fw = NULL;
1418 release_firmware(adev->gfx.me_fw);
1419 adev->gfx.me_fw = NULL;
1420 release_firmware(adev->gfx.ce_fw);
1421 adev->gfx.ce_fw = NULL;
1426 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1427 const char *chip_name)
1431 struct amdgpu_firmware_info *info = NULL;
1432 const struct common_firmware_header *header = NULL;
1433 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1434 unsigned int *tmp = NULL;
1436 uint16_t version_major;
1437 uint16_t version_minor;
1438 uint32_t smu_version;
1441 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1442 * instead of picasso_rlc.bin.
1444 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1445 * or revision >= 0xD8 && revision <= 0xDF
1446 * otherwise is PCO FP5
1448 if (!strcmp(chip_name, "picasso") &&
1449 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1450 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1451 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1452 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1453 (smu_version >= 0x41e2b))
1455 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1457 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1459 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1460 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1463 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1464 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1466 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1467 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1468 if (version_major == 2 && version_minor == 1)
1469 adev->gfx.rlc.is_rlc_v2_1 = true;
1471 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1472 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1473 adev->gfx.rlc.save_and_restore_offset =
1474 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1475 adev->gfx.rlc.clear_state_descriptor_offset =
1476 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1477 adev->gfx.rlc.avail_scratch_ram_locations =
1478 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1479 adev->gfx.rlc.reg_restore_list_size =
1480 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1481 adev->gfx.rlc.reg_list_format_start =
1482 le32_to_cpu(rlc_hdr->reg_list_format_start);
1483 adev->gfx.rlc.reg_list_format_separate_start =
1484 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1485 adev->gfx.rlc.starting_offsets_start =
1486 le32_to_cpu(rlc_hdr->starting_offsets_start);
1487 adev->gfx.rlc.reg_list_format_size_bytes =
1488 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1489 adev->gfx.rlc.reg_list_size_bytes =
1490 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1491 adev->gfx.rlc.register_list_format =
1492 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1493 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1494 if (!adev->gfx.rlc.register_list_format) {
1499 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1500 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1501 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1502 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1504 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1506 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1507 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1508 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1509 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1511 if (adev->gfx.rlc.is_rlc_v2_1)
1512 gfx_v9_0_init_rlc_ext_microcode(adev);
1514 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1515 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1516 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1517 info->fw = adev->gfx.rlc_fw;
1518 header = (const struct common_firmware_header *)info->fw->data;
1519 adev->firmware.fw_size +=
1520 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1522 if (adev->gfx.rlc.is_rlc_v2_1 &&
1523 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1524 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1525 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1526 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1527 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1528 info->fw = adev->gfx.rlc_fw;
1529 adev->firmware.fw_size +=
1530 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1532 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1533 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1534 info->fw = adev->gfx.rlc_fw;
1535 adev->firmware.fw_size +=
1536 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1538 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1539 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1540 info->fw = adev->gfx.rlc_fw;
1541 adev->firmware.fw_size +=
1542 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1549 "gfx9: Failed to load firmware \"%s\"\n",
1551 release_firmware(adev->gfx.rlc_fw);
1552 adev->gfx.rlc_fw = NULL;
1557 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1559 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1560 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1561 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1567 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1568 const char *chip_name)
1572 struct amdgpu_firmware_info *info = NULL;
1573 const struct common_firmware_header *header = NULL;
1574 const struct gfx_firmware_header_v1_0 *cp_hdr;
1576 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1577 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1580 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1583 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1584 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1585 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1588 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1589 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1590 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1592 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1595 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1596 adev->gfx.mec2_fw->data;
1597 adev->gfx.mec2_fw_version =
1598 le32_to_cpu(cp_hdr->header.ucode_version);
1599 adev->gfx.mec2_feature_version =
1600 le32_to_cpu(cp_hdr->ucode_feature_version);
1603 adev->gfx.mec2_fw = NULL;
1606 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1607 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1610 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1611 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1612 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1613 info->fw = adev->gfx.mec_fw;
1614 header = (const struct common_firmware_header *)info->fw->data;
1615 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1616 adev->firmware.fw_size +=
1617 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1619 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1620 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1621 info->fw = adev->gfx.mec_fw;
1622 adev->firmware.fw_size +=
1623 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1625 if (adev->gfx.mec2_fw) {
1626 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1627 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1628 info->fw = adev->gfx.mec2_fw;
1629 header = (const struct common_firmware_header *)info->fw->data;
1630 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1631 adev->firmware.fw_size +=
1632 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1634 /* TODO: Determine if MEC2 JT FW loading can be removed
1635 for all GFX V9 asic and above */
1636 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1637 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1638 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1639 info->fw = adev->gfx.mec2_fw;
1640 adev->firmware.fw_size +=
1641 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1648 gfx_v9_0_check_if_need_gfxoff(adev);
1649 gfx_v9_0_check_fw_write_wait(adev);
1652 "gfx9: Failed to load firmware \"%s\"\n",
1654 release_firmware(adev->gfx.mec_fw);
1655 adev->gfx.mec_fw = NULL;
1656 release_firmware(adev->gfx.mec2_fw);
1657 adev->gfx.mec2_fw = NULL;
1662 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1664 const char *chip_name;
1669 switch (adev->ip_versions[GC_HWIP][0]) {
1670 case IP_VERSION(9, 0, 1):
1671 chip_name = "vega10";
1673 case IP_VERSION(9, 2, 1):
1674 chip_name = "vega12";
1676 case IP_VERSION(9, 4, 0):
1677 chip_name = "vega20";
1679 case IP_VERSION(9, 2, 2):
1680 case IP_VERSION(9, 1, 0):
1681 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1682 chip_name = "raven2";
1683 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1684 chip_name = "picasso";
1686 chip_name = "raven";
1688 case IP_VERSION(9, 4, 1):
1689 chip_name = "arcturus";
1691 case IP_VERSION(9, 3, 0):
1692 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1693 chip_name = "renoir";
1695 chip_name = "green_sardine";
1697 case IP_VERSION(9, 4, 2):
1698 chip_name = "aldebaran";
1704 /* No CPG in Arcturus */
1705 if (adev->gfx.num_gfx_rings) {
1706 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1711 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1715 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1722 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1725 const struct cs_section_def *sect = NULL;
1726 const struct cs_extent_def *ext = NULL;
1728 /* begin clear state */
1730 /* context control state */
1733 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1734 for (ext = sect->section; ext->extent != NULL; ++ext) {
1735 if (sect->id == SECT_CONTEXT)
1736 count += 2 + ext->reg_count;
1742 /* end clear state */
1750 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1751 volatile u32 *buffer)
1754 const struct cs_section_def *sect = NULL;
1755 const struct cs_extent_def *ext = NULL;
1757 if (adev->gfx.rlc.cs_data == NULL)
1762 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1763 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1765 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1766 buffer[count++] = cpu_to_le32(0x80000000);
1767 buffer[count++] = cpu_to_le32(0x80000000);
1769 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1770 for (ext = sect->section; ext->extent != NULL; ++ext) {
1771 if (sect->id == SECT_CONTEXT) {
1773 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1774 buffer[count++] = cpu_to_le32(ext->reg_index -
1775 PACKET3_SET_CONTEXT_REG_START);
1776 for (i = 0; i < ext->reg_count; i++)
1777 buffer[count++] = cpu_to_le32(ext->extent[i]);
1784 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1785 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1787 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1788 buffer[count++] = cpu_to_le32(0);
1791 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1793 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1794 uint32_t pg_always_on_cu_num = 2;
1795 uint32_t always_on_cu_num;
1797 uint32_t mask, cu_bitmap, counter;
1799 if (adev->flags & AMD_IS_APU)
1800 always_on_cu_num = 4;
1801 else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1802 always_on_cu_num = 8;
1804 always_on_cu_num = 12;
1806 mutex_lock(&adev->grbm_idx_mutex);
1807 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1808 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1812 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1814 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1815 if (cu_info->bitmap[i][j] & mask) {
1816 if (counter == pg_always_on_cu_num)
1817 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1818 if (counter < always_on_cu_num)
1827 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1828 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1831 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1832 mutex_unlock(&adev->grbm_idx_mutex);
1835 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1839 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1840 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1841 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1842 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1843 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1845 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1846 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1848 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1849 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1851 mutex_lock(&adev->grbm_idx_mutex);
1852 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1853 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1854 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1856 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1857 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1858 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1859 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1860 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1862 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1863 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1866 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1869 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1870 * programmed in gfx_v9_0_init_always_on_cu_mask()
1873 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1874 * but used for RLC_LB_CNTL configuration */
1875 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1876 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1877 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1878 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1879 mutex_unlock(&adev->grbm_idx_mutex);
1881 gfx_v9_0_init_always_on_cu_mask(adev);
1884 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1888 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1889 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1890 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1891 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1892 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1894 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1895 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1897 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1898 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1900 mutex_lock(&adev->grbm_idx_mutex);
1901 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1902 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1903 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1905 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1906 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1907 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1908 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1909 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1911 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1912 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1915 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1918 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1919 * programmed in gfx_v9_0_init_always_on_cu_mask()
1922 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1923 * but used for RLC_LB_CNTL configuration */
1924 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1925 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1926 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1927 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1928 mutex_unlock(&adev->grbm_idx_mutex);
1930 gfx_v9_0_init_always_on_cu_mask(adev);
1933 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1935 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1938 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1940 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1946 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1948 const struct cs_section_def *cs_data;
1951 adev->gfx.rlc.cs_data = gfx9_cs_data;
1953 cs_data = adev->gfx.rlc.cs_data;
1956 /* init clear state block */
1957 r = amdgpu_gfx_rlc_init_csb(adev);
1962 if (adev->flags & AMD_IS_APU) {
1963 /* TODO: double check the cp_table_size for RV */
1964 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1965 r = amdgpu_gfx_rlc_init_cpt(adev);
1970 switch (adev->ip_versions[GC_HWIP][0]) {
1971 case IP_VERSION(9, 2, 2):
1972 case IP_VERSION(9, 1, 0):
1973 gfx_v9_0_init_lbpw(adev);
1975 case IP_VERSION(9, 4, 0):
1976 gfx_v9_4_init_lbpw(adev);
1982 /* init spm vmid with 0xf */
1983 if (adev->gfx.rlc.funcs->update_spm_vmid)
1984 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1989 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1991 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1992 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1995 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1999 const __le32 *fw_data;
2002 size_t mec_hpd_size;
2004 const struct gfx_firmware_header_v1_0 *mec_hdr;
2006 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2008 /* take ownership of the relevant compute queues */
2009 amdgpu_gfx_compute_queue_acquire(adev);
2010 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2012 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2013 AMDGPU_GEM_DOMAIN_VRAM,
2014 &adev->gfx.mec.hpd_eop_obj,
2015 &adev->gfx.mec.hpd_eop_gpu_addr,
2018 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2019 gfx_v9_0_mec_fini(adev);
2023 memset(hpd, 0, mec_hpd_size);
2025 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2026 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2029 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2031 fw_data = (const __le32 *)
2032 (adev->gfx.mec_fw->data +
2033 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2034 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2036 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2037 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2038 &adev->gfx.mec.mec_fw_obj,
2039 &adev->gfx.mec.mec_fw_gpu_addr,
2042 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2043 gfx_v9_0_mec_fini(adev);
2047 memcpy(fw, fw_data, fw_size);
2049 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2050 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2055 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2057 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2058 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2059 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2060 (address << SQ_IND_INDEX__INDEX__SHIFT) |
2061 (SQ_IND_INDEX__FORCE_READ_MASK));
2062 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2065 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2066 uint32_t wave, uint32_t thread,
2067 uint32_t regno, uint32_t num, uint32_t *out)
2069 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2070 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2071 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2072 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2073 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2074 (SQ_IND_INDEX__FORCE_READ_MASK) |
2075 (SQ_IND_INDEX__AUTO_INCR_MASK));
2077 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2080 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2082 /* type 1 wave data */
2083 dst[(*no_fields)++] = 1;
2084 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2085 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2086 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2087 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2088 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2089 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2090 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2091 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2092 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2093 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2094 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2095 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2096 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2097 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2098 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2101 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2102 uint32_t wave, uint32_t start,
2103 uint32_t size, uint32_t *dst)
2106 adev, simd, wave, 0,
2107 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2110 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2111 uint32_t wave, uint32_t thread,
2112 uint32_t start, uint32_t size,
2116 adev, simd, wave, thread,
2117 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2120 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2121 u32 me, u32 pipe, u32 q, u32 vm)
2123 soc15_grbm_select(adev, me, pipe, q, vm);
2126 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2127 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2128 .select_se_sh = &gfx_v9_0_select_se_sh,
2129 .read_wave_data = &gfx_v9_0_read_wave_data,
2130 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2131 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2132 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2135 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2136 .ras_late_init = amdgpu_gfx_ras_late_init,
2137 .ras_fini = amdgpu_gfx_ras_fini,
2138 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2139 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2140 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2143 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2148 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2150 switch (adev->ip_versions[GC_HWIP][0]) {
2151 case IP_VERSION(9, 0, 1):
2152 adev->gfx.config.max_hw_contexts = 8;
2153 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2154 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2155 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2156 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2157 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2159 case IP_VERSION(9, 2, 1):
2160 adev->gfx.config.max_hw_contexts = 8;
2161 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2162 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2163 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2164 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2165 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2166 DRM_INFO("fix gfx.config for vega12\n");
2168 case IP_VERSION(9, 4, 0):
2169 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2170 adev->gfx.config.max_hw_contexts = 8;
2171 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2172 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2173 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2174 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2175 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2176 gb_addr_config &= ~0xf3e777ff;
2177 gb_addr_config |= 0x22014042;
2178 /* check vbios table if gpu info is not available */
2179 err = amdgpu_atomfirmware_get_gfx_info(adev);
2183 case IP_VERSION(9, 2, 2):
2184 case IP_VERSION(9, 1, 0):
2185 adev->gfx.config.max_hw_contexts = 8;
2186 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2187 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2188 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2189 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2190 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2191 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2193 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2195 case IP_VERSION(9, 4, 1):
2196 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2197 adev->gfx.config.max_hw_contexts = 8;
2198 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2199 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2200 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2201 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2202 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2203 gb_addr_config &= ~0xf3e777ff;
2204 gb_addr_config |= 0x22014042;
2206 case IP_VERSION(9, 3, 0):
2207 adev->gfx.config.max_hw_contexts = 8;
2208 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2209 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2210 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2211 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2212 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2213 gb_addr_config &= ~0xf3e777ff;
2214 gb_addr_config |= 0x22010042;
2216 case IP_VERSION(9, 4, 2):
2217 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2218 adev->gfx.config.max_hw_contexts = 8;
2219 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2220 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2221 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2222 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2223 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2224 gb_addr_config &= ~0xf3e777ff;
2225 gb_addr_config |= 0x22014042;
2226 /* check vbios table if gpu info is not available */
2227 err = amdgpu_atomfirmware_get_gfx_info(adev);
2236 adev->gfx.config.gb_addr_config = gb_addr_config;
2238 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2240 adev->gfx.config.gb_addr_config,
2244 adev->gfx.config.max_tile_pipes =
2245 adev->gfx.config.gb_addr_config_fields.num_pipes;
2247 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2249 adev->gfx.config.gb_addr_config,
2252 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2254 adev->gfx.config.gb_addr_config,
2256 MAX_COMPRESSED_FRAGS);
2257 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2259 adev->gfx.config.gb_addr_config,
2262 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2264 adev->gfx.config.gb_addr_config,
2266 NUM_SHADER_ENGINES);
2267 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2269 adev->gfx.config.gb_addr_config,
2271 PIPE_INTERLEAVE_SIZE));
2276 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2277 int mec, int pipe, int queue)
2280 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2281 unsigned int hw_prio;
2283 ring = &adev->gfx.compute_ring[ring_id];
2288 ring->queue = queue;
2290 ring->ring_obj = NULL;
2291 ring->use_doorbell = true;
2292 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2293 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2294 + (ring_id * GFX9_MEC_HPD_SIZE);
2295 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2297 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2298 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2300 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2301 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2302 /* type-2 packets are deprecated on MEC, use type-3 instead */
2303 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2307 static int gfx_v9_0_sw_init(void *handle)
2309 int i, j, k, r, ring_id;
2310 struct amdgpu_ring *ring;
2311 struct amdgpu_kiq *kiq;
2312 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2314 switch (adev->ip_versions[GC_HWIP][0]) {
2315 case IP_VERSION(9, 0, 1):
2316 case IP_VERSION(9, 2, 1):
2317 case IP_VERSION(9, 4, 0):
2318 case IP_VERSION(9, 2, 2):
2319 case IP_VERSION(9, 1, 0):
2320 case IP_VERSION(9, 4, 1):
2321 case IP_VERSION(9, 3, 0):
2322 case IP_VERSION(9, 4, 2):
2323 adev->gfx.mec.num_mec = 2;
2326 adev->gfx.mec.num_mec = 1;
2330 adev->gfx.mec.num_pipe_per_mec = 4;
2331 adev->gfx.mec.num_queue_per_pipe = 8;
2334 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2338 /* Privileged reg */
2339 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2340 &adev->gfx.priv_reg_irq);
2344 /* Privileged inst */
2345 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2346 &adev->gfx.priv_inst_irq);
2351 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2352 &adev->gfx.cp_ecc_error_irq);
2357 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2358 &adev->gfx.cp_ecc_error_irq);
2362 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2364 gfx_v9_0_scratch_init(adev);
2366 r = gfx_v9_0_init_microcode(adev);
2368 DRM_ERROR("Failed to load gfx firmware!\n");
2372 r = adev->gfx.rlc.funcs->init(adev);
2374 DRM_ERROR("Failed to init rlc BOs!\n");
2378 r = gfx_v9_0_mec_init(adev);
2380 DRM_ERROR("Failed to init MEC BOs!\n");
2384 /* set up the gfx ring */
2385 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2386 ring = &adev->gfx.gfx_ring[i];
2387 ring->ring_obj = NULL;
2389 sprintf(ring->name, "gfx");
2391 sprintf(ring->name, "gfx_%d", i);
2392 ring->use_doorbell = true;
2393 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2394 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2395 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2396 AMDGPU_RING_PRIO_DEFAULT, NULL);
2401 /* set up the compute queues - allocate horizontally across pipes */
2403 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2404 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2405 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2406 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2409 r = gfx_v9_0_compute_ring_init(adev,
2420 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2422 DRM_ERROR("Failed to init KIQ BOs!\n");
2426 kiq = &adev->gfx.kiq;
2427 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2431 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2432 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2436 adev->gfx.ce_ram_size = 0x8000;
2438 r = gfx_v9_0_gpu_early_init(adev);
2446 static int gfx_v9_0_sw_fini(void *handle)
2449 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2451 if (adev->gfx.ras_funcs &&
2452 adev->gfx.ras_funcs->ras_fini)
2453 adev->gfx.ras_funcs->ras_fini(adev);
2455 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2456 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2457 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2458 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2460 amdgpu_gfx_mqd_sw_fini(adev);
2461 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2462 amdgpu_gfx_kiq_fini(adev);
2464 gfx_v9_0_mec_fini(adev);
2465 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2466 if (adev->flags & AMD_IS_APU) {
2467 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2468 &adev->gfx.rlc.cp_table_gpu_addr,
2469 (void **)&adev->gfx.rlc.cp_table_ptr);
2471 gfx_v9_0_free_microcode(adev);
2477 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2482 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2487 if (instance == 0xffffffff)
2488 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2490 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2492 if (se_num == 0xffffffff)
2493 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2495 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2497 if (sh_num == 0xffffffff)
2498 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2500 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2502 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2505 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2509 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2510 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2512 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2513 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2515 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2516 adev->gfx.config.max_sh_per_se);
2518 return (~data) & mask;
2521 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2526 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2527 adev->gfx.config.max_sh_per_se;
2529 mutex_lock(&adev->grbm_idx_mutex);
2530 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2531 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2532 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2533 data = gfx_v9_0_get_rb_active_bitmap(adev);
2534 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2535 rb_bitmap_width_per_sh);
2538 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2539 mutex_unlock(&adev->grbm_idx_mutex);
2541 adev->gfx.config.backend_enable_mask = active_rbs;
2542 adev->gfx.config.num_rbs = hweight32(active_rbs);
2545 #define DEFAULT_SH_MEM_BASES (0x6000)
2546 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2549 uint32_t sh_mem_config;
2550 uint32_t sh_mem_bases;
2553 * Configure apertures:
2554 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2555 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2556 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2558 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2560 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2561 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2562 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2564 mutex_lock(&adev->srbm_mutex);
2565 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2566 soc15_grbm_select(adev, 0, 0, 0, i);
2567 /* CP and shaders */
2568 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2569 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2571 soc15_grbm_select(adev, 0, 0, 0, 0);
2572 mutex_unlock(&adev->srbm_mutex);
2574 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2575 acccess. These should be enabled by FW for target VMIDs. */
2576 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2577 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2578 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2579 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2580 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2584 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2589 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2590 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2591 * the driver can enable them for graphics. VMID0 should maintain
2592 * access so that HWS firmware can save/restore entries.
2594 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2595 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2596 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2597 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2598 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2602 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2606 switch (adev->ip_versions[GC_HWIP][0]) {
2607 case IP_VERSION(9, 4, 1):
2608 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2609 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2610 DISABLE_BARRIER_WAITCNT, 1);
2611 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2618 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2623 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2625 gfx_v9_0_tiling_mode_table_init(adev);
2627 gfx_v9_0_setup_rb(adev);
2628 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2629 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2631 /* XXX SH_MEM regs */
2632 /* where to put LDS, scratch, GPUVM in FSA64 space */
2633 mutex_lock(&adev->srbm_mutex);
2634 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2635 soc15_grbm_select(adev, 0, 0, 0, i);
2636 /* CP and shaders */
2638 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2639 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2640 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2641 !!adev->gmc.noretry);
2642 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2643 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2645 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2646 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2647 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2648 !!adev->gmc.noretry);
2649 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2650 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2651 (adev->gmc.private_aperture_start >> 48));
2652 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2653 (adev->gmc.shared_aperture_start >> 48));
2654 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2657 soc15_grbm_select(adev, 0, 0, 0, 0);
2659 mutex_unlock(&adev->srbm_mutex);
2661 gfx_v9_0_init_compute_vmid(adev);
2662 gfx_v9_0_init_gds_vmid(adev);
2663 gfx_v9_0_init_sq_config(adev);
2666 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2671 mutex_lock(&adev->grbm_idx_mutex);
2672 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2673 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2674 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2675 for (k = 0; k < adev->usec_timeout; k++) {
2676 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2680 if (k == adev->usec_timeout) {
2681 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2682 0xffffffff, 0xffffffff);
2683 mutex_unlock(&adev->grbm_idx_mutex);
2684 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2690 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2691 mutex_unlock(&adev->grbm_idx_mutex);
2693 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2694 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2695 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2696 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2697 for (k = 0; k < adev->usec_timeout; k++) {
2698 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2704 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2709 /* These interrupts should be enabled to drive DS clock */
2711 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2713 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2714 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2715 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2716 if(adev->gfx.num_gfx_rings)
2717 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2719 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2722 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2724 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2726 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2727 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2728 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2729 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2730 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2731 adev->gfx.rlc.clear_state_size);
2734 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2735 int indirect_offset,
2737 int *unique_indirect_regs,
2738 int unique_indirect_reg_count,
2739 int *indirect_start_offsets,
2740 int *indirect_start_offsets_count,
2741 int max_start_offsets_count)
2745 for (; indirect_offset < list_size; indirect_offset++) {
2746 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2747 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2748 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2750 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2751 indirect_offset += 2;
2753 /* look for the matching indice */
2754 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2755 if (unique_indirect_regs[idx] ==
2756 register_list_format[indirect_offset] ||
2757 !unique_indirect_regs[idx])
2761 BUG_ON(idx >= unique_indirect_reg_count);
2763 if (!unique_indirect_regs[idx])
2764 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2771 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2773 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2774 int unique_indirect_reg_count = 0;
2776 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2777 int indirect_start_offsets_count = 0;
2783 u32 *register_list_format =
2784 kmemdup(adev->gfx.rlc.register_list_format,
2785 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2786 if (!register_list_format)
2789 /* setup unique_indirect_regs array and indirect_start_offsets array */
2790 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2791 gfx_v9_1_parse_ind_reg_list(register_list_format,
2792 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2793 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2794 unique_indirect_regs,
2795 unique_indirect_reg_count,
2796 indirect_start_offsets,
2797 &indirect_start_offsets_count,
2798 ARRAY_SIZE(indirect_start_offsets));
2800 /* enable auto inc in case it is disabled */
2801 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2802 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2803 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2805 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2806 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2807 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2808 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2809 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2810 adev->gfx.rlc.register_restore[i]);
2812 /* load indirect register */
2813 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2814 adev->gfx.rlc.reg_list_format_start);
2816 /* direct register portion */
2817 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2818 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2819 register_list_format[i]);
2821 /* indirect register portion */
2822 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2823 if (register_list_format[i] == 0xFFFFFFFF) {
2824 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2828 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2829 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2831 for (j = 0; j < unique_indirect_reg_count; j++) {
2832 if (register_list_format[i] == unique_indirect_regs[j]) {
2833 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2838 BUG_ON(j >= unique_indirect_reg_count);
2843 /* set save/restore list size */
2844 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2845 list_size = list_size >> 1;
2846 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2847 adev->gfx.rlc.reg_restore_list_size);
2848 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2850 /* write the starting offsets to RLC scratch ram */
2851 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2852 adev->gfx.rlc.starting_offsets_start);
2853 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2854 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2855 indirect_start_offsets[i]);
2857 /* load unique indirect regs*/
2858 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2859 if (unique_indirect_regs[i] != 0) {
2860 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2861 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2862 unique_indirect_regs[i] & 0x3FFFF);
2864 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2865 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2866 unique_indirect_regs[i] >> 20);
2870 kfree(register_list_format);
2874 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2876 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2879 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2883 uint32_t default_data = 0;
2885 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2887 /* enable GFXIP control over CGPG */
2888 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2889 if(default_data != data)
2890 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2893 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2894 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2895 if(default_data != data)
2896 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2898 /* restore GFXIP control over GCPG */
2899 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2900 if(default_data != data)
2901 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2905 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2909 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2910 AMD_PG_SUPPORT_GFX_SMG |
2911 AMD_PG_SUPPORT_GFX_DMG)) {
2912 /* init IDLE_POLL_COUNT = 60 */
2913 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2914 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2915 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2916 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2918 /* init RLC PG Delay */
2920 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2921 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2922 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2923 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2924 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2926 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2927 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2928 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2929 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2931 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2932 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2933 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2934 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2936 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2937 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2939 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2940 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2941 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2942 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2943 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2947 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2951 uint32_t default_data = 0;
2953 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2954 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2955 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2957 if (default_data != data)
2958 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2961 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2965 uint32_t default_data = 0;
2967 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2968 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2969 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2971 if(default_data != data)
2972 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2975 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2979 uint32_t default_data = 0;
2981 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2982 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2985 if(default_data != data)
2986 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2989 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2992 uint32_t data, default_data;
2994 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2995 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2996 GFX_POWER_GATING_ENABLE,
2998 if(default_data != data)
2999 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3002 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3005 uint32_t data, default_data;
3007 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3008 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3009 GFX_PIPELINE_PG_ENABLE,
3011 if(default_data != data)
3012 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3015 /* read any GFX register to wake up GFX */
3016 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3019 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3022 uint32_t data, default_data;
3024 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3025 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3026 STATIC_PER_CU_PG_ENABLE,
3028 if(default_data != data)
3029 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3032 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3035 uint32_t data, default_data;
3037 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3038 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3039 DYN_PER_CU_PG_ENABLE,
3041 if(default_data != data)
3042 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3045 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3047 gfx_v9_0_init_csb(adev);
3050 * Rlc save restore list is workable since v2_1.
3051 * And it's needed by gfxoff feature.
3053 if (adev->gfx.rlc.is_rlc_v2_1) {
3054 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3055 (adev->apu_flags & AMD_APU_IS_RAVEN2))
3056 gfx_v9_1_init_rlc_save_restore_list(adev);
3057 gfx_v9_0_enable_save_restore_machine(adev);
3060 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3061 AMD_PG_SUPPORT_GFX_SMG |
3062 AMD_PG_SUPPORT_GFX_DMG |
3064 AMD_PG_SUPPORT_GDS |
3065 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3066 WREG32(mmRLC_JUMP_TABLE_RESTORE,
3067 adev->gfx.rlc.cp_table_gpu_addr >> 8);
3068 gfx_v9_0_init_gfx_power_gating(adev);
3072 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3074 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3075 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3076 gfx_v9_0_wait_for_rlc_serdes(adev);
3079 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3081 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3083 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3087 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3089 #ifdef AMDGPU_RLC_DEBUG_RETRY
3093 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3096 /* carrizo do enable cp interrupt after cp inited */
3097 if (!(adev->flags & AMD_IS_APU)) {
3098 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3102 #ifdef AMDGPU_RLC_DEBUG_RETRY
3103 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3104 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3105 if(rlc_ucode_ver == 0x108) {
3106 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3107 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3108 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3109 * default is 0x9C4 to create a 100us interval */
3110 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3111 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3112 * to disable the page fault retry interrupts, default is
3114 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3119 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3121 const struct rlc_firmware_header_v2_0 *hdr;
3122 const __le32 *fw_data;
3123 unsigned i, fw_size;
3125 if (!adev->gfx.rlc_fw)
3128 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3129 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3131 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3132 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3133 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3135 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3136 RLCG_UCODE_LOADING_START_ADDRESS);
3137 for (i = 0; i < fw_size; i++)
3138 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3139 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3144 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3148 if (amdgpu_sriov_vf(adev)) {
3149 gfx_v9_0_init_csb(adev);
3153 adev->gfx.rlc.funcs->stop(adev);
3156 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3158 gfx_v9_0_init_pg(adev);
3160 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3161 /* legacy rlc firmware loading */
3162 r = gfx_v9_0_rlc_load_microcode(adev);
3167 switch (adev->ip_versions[GC_HWIP][0]) {
3168 case IP_VERSION(9, 2, 2):
3169 case IP_VERSION(9, 1, 0):
3170 if (amdgpu_lbpw == 0)
3171 gfx_v9_0_enable_lbpw(adev, false);
3173 gfx_v9_0_enable_lbpw(adev, true);
3175 case IP_VERSION(9, 4, 0):
3176 if (amdgpu_lbpw > 0)
3177 gfx_v9_0_enable_lbpw(adev, true);
3179 gfx_v9_0_enable_lbpw(adev, false);
3185 adev->gfx.rlc.funcs->start(adev);
3190 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3192 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3194 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3195 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3196 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3197 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3201 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3203 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3204 const struct gfx_firmware_header_v1_0 *ce_hdr;
3205 const struct gfx_firmware_header_v1_0 *me_hdr;
3206 const __le32 *fw_data;
3207 unsigned i, fw_size;
3209 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3212 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3213 adev->gfx.pfp_fw->data;
3214 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3215 adev->gfx.ce_fw->data;
3216 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3217 adev->gfx.me_fw->data;
3219 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3220 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3221 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3223 gfx_v9_0_cp_gfx_enable(adev, false);
3226 fw_data = (const __le32 *)
3227 (adev->gfx.pfp_fw->data +
3228 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3229 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3230 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3231 for (i = 0; i < fw_size; i++)
3232 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3233 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3236 fw_data = (const __le32 *)
3237 (adev->gfx.ce_fw->data +
3238 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3239 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3240 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3241 for (i = 0; i < fw_size; i++)
3242 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3243 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3246 fw_data = (const __le32 *)
3247 (adev->gfx.me_fw->data +
3248 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3249 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3250 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3251 for (i = 0; i < fw_size; i++)
3252 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3253 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3258 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3260 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3261 const struct cs_section_def *sect = NULL;
3262 const struct cs_extent_def *ext = NULL;
3266 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3267 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3269 gfx_v9_0_cp_gfx_enable(adev, true);
3271 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3273 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3277 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3278 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3280 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3281 amdgpu_ring_write(ring, 0x80000000);
3282 amdgpu_ring_write(ring, 0x80000000);
3284 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3285 for (ext = sect->section; ext->extent != NULL; ++ext) {
3286 if (sect->id == SECT_CONTEXT) {
3287 amdgpu_ring_write(ring,
3288 PACKET3(PACKET3_SET_CONTEXT_REG,
3290 amdgpu_ring_write(ring,
3291 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3292 for (i = 0; i < ext->reg_count; i++)
3293 amdgpu_ring_write(ring, ext->extent[i]);
3298 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3299 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3301 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3302 amdgpu_ring_write(ring, 0);
3304 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3305 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3306 amdgpu_ring_write(ring, 0x8000);
3307 amdgpu_ring_write(ring, 0x8000);
3309 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3310 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3311 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3312 amdgpu_ring_write(ring, tmp);
3313 amdgpu_ring_write(ring, 0);
3315 amdgpu_ring_commit(ring);
3320 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3322 struct amdgpu_ring *ring;
3325 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3327 /* Set the write pointer delay */
3328 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3330 /* set the RB to use vmid 0 */
3331 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3333 /* Set ring buffer size */
3334 ring = &adev->gfx.gfx_ring[0];
3335 rb_bufsz = order_base_2(ring->ring_size / 8);
3336 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3337 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3339 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3341 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3343 /* Initialize the ring buffer's write pointers */
3345 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3346 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3348 /* set the wb address wether it's enabled or not */
3349 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3350 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3351 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3353 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3354 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3355 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3358 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3360 rb_addr = ring->gpu_addr >> 8;
3361 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3362 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3364 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3365 if (ring->use_doorbell) {
3366 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3367 DOORBELL_OFFSET, ring->doorbell_index);
3368 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3371 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3373 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3375 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3376 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3377 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3379 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3380 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3383 /* start the ring */
3384 gfx_v9_0_cp_gfx_start(adev);
3385 ring->sched.ready = true;
3390 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3393 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3395 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3396 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3397 adev->gfx.kiq.ring.sched.ready = false;
3402 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3404 const struct gfx_firmware_header_v1_0 *mec_hdr;
3405 const __le32 *fw_data;
3409 if (!adev->gfx.mec_fw)
3412 gfx_v9_0_cp_compute_enable(adev, false);
3414 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3415 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3417 fw_data = (const __le32 *)
3418 (adev->gfx.mec_fw->data +
3419 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3421 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3422 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3423 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3425 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3426 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3427 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3428 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3431 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3432 mec_hdr->jt_offset);
3433 for (i = 0; i < mec_hdr->jt_size; i++)
3434 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3435 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3437 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3438 adev->gfx.mec_fw_version);
3439 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3445 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3448 struct amdgpu_device *adev = ring->adev;
3450 /* tell RLC which is KIQ queue */
3451 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3453 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3454 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3456 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3459 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3461 struct amdgpu_device *adev = ring->adev;
3463 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3464 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3465 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3466 mqd->cp_hqd_queue_priority =
3467 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3472 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3474 struct amdgpu_device *adev = ring->adev;
3475 struct v9_mqd *mqd = ring->mqd_ptr;
3476 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3479 mqd->header = 0xC0310800;
3480 mqd->compute_pipelinestat_enable = 0x00000001;
3481 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3482 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3483 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3484 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3485 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3486 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3487 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3488 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3489 mqd->compute_misc_reserved = 0x00000003;
3491 mqd->dynamic_cu_mask_addr_lo =
3492 lower_32_bits(ring->mqd_gpu_addr
3493 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3494 mqd->dynamic_cu_mask_addr_hi =
3495 upper_32_bits(ring->mqd_gpu_addr
3496 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3498 eop_base_addr = ring->eop_gpu_addr >> 8;
3499 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3500 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3502 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3503 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3504 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3505 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3507 mqd->cp_hqd_eop_control = tmp;
3509 /* enable doorbell? */
3510 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3512 if (ring->use_doorbell) {
3513 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3514 DOORBELL_OFFSET, ring->doorbell_index);
3515 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3517 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3518 DOORBELL_SOURCE, 0);
3519 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3522 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3526 mqd->cp_hqd_pq_doorbell_control = tmp;
3528 /* disable the queue if it's active */
3530 mqd->cp_hqd_dequeue_request = 0;
3531 mqd->cp_hqd_pq_rptr = 0;
3532 mqd->cp_hqd_pq_wptr_lo = 0;
3533 mqd->cp_hqd_pq_wptr_hi = 0;
3535 /* set the pointer to the MQD */
3536 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3537 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3539 /* set MQD vmid to 0 */
3540 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3541 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3542 mqd->cp_mqd_control = tmp;
3544 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3545 hqd_gpu_addr = ring->gpu_addr >> 8;
3546 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3547 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3549 /* set up the HQD, this is similar to CP_RB0_CNTL */
3550 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3551 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3552 (order_base_2(ring->ring_size / 4) - 1));
3553 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3554 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3556 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3558 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3559 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3560 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3561 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3562 mqd->cp_hqd_pq_control = tmp;
3564 /* set the wb address whether it's enabled or not */
3565 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3566 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3567 mqd->cp_hqd_pq_rptr_report_addr_hi =
3568 upper_32_bits(wb_gpu_addr) & 0xffff;
3570 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3571 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3572 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3573 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3576 /* enable the doorbell if requested */
3577 if (ring->use_doorbell) {
3578 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3579 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3580 DOORBELL_OFFSET, ring->doorbell_index);
3582 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3584 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3585 DOORBELL_SOURCE, 0);
3586 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3590 mqd->cp_hqd_pq_doorbell_control = tmp;
3592 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3594 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3596 /* set the vmid for the queue */
3597 mqd->cp_hqd_vmid = 0;
3599 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3600 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3601 mqd->cp_hqd_persistent_state = tmp;
3603 /* set MIN_IB_AVAIL_SIZE */
3604 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3605 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3606 mqd->cp_hqd_ib_control = tmp;
3608 /* set static priority for a queue/ring */
3609 gfx_v9_0_mqd_set_priority(ring, mqd);
3610 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3612 /* map_queues packet doesn't need activate the queue,
3613 * so only kiq need set this field.
3615 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3616 mqd->cp_hqd_active = 1;
3621 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3623 struct amdgpu_device *adev = ring->adev;
3624 struct v9_mqd *mqd = ring->mqd_ptr;
3627 /* disable wptr polling */
3628 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3631 mqd->cp_hqd_eop_base_addr_lo);
3632 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3633 mqd->cp_hqd_eop_base_addr_hi);
3635 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3636 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3637 mqd->cp_hqd_eop_control);
3639 /* enable doorbell? */
3640 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3641 mqd->cp_hqd_pq_doorbell_control);
3643 /* disable the queue if it's active */
3644 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3645 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3646 for (j = 0; j < adev->usec_timeout; j++) {
3647 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3651 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3652 mqd->cp_hqd_dequeue_request);
3653 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3654 mqd->cp_hqd_pq_rptr);
3655 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3656 mqd->cp_hqd_pq_wptr_lo);
3657 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3658 mqd->cp_hqd_pq_wptr_hi);
3661 /* set the pointer to the MQD */
3662 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3663 mqd->cp_mqd_base_addr_lo);
3664 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3665 mqd->cp_mqd_base_addr_hi);
3667 /* set MQD vmid to 0 */
3668 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3669 mqd->cp_mqd_control);
3671 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3672 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3673 mqd->cp_hqd_pq_base_lo);
3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3675 mqd->cp_hqd_pq_base_hi);
3677 /* set up the HQD, this is similar to CP_RB0_CNTL */
3678 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3679 mqd->cp_hqd_pq_control);
3681 /* set the wb address whether it's enabled or not */
3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3683 mqd->cp_hqd_pq_rptr_report_addr_lo);
3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3685 mqd->cp_hqd_pq_rptr_report_addr_hi);
3687 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3688 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3689 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3690 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3691 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3693 /* enable the doorbell if requested */
3694 if (ring->use_doorbell) {
3695 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3696 (adev->doorbell_index.kiq * 2) << 2);
3697 /* If GC has entered CGPG, ringing doorbell > first page
3698 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3699 * workaround this issue. And this change has to align with firmware
3702 if (check_if_enlarge_doorbell_range(adev))
3703 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3704 (adev->doorbell.size - 4));
3706 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3707 (adev->doorbell_index.userqueue_end * 2) << 2);
3710 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3711 mqd->cp_hqd_pq_doorbell_control);
3713 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3714 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3715 mqd->cp_hqd_pq_wptr_lo);
3716 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3717 mqd->cp_hqd_pq_wptr_hi);
3719 /* set the vmid for the queue */
3720 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3722 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3723 mqd->cp_hqd_persistent_state);
3725 /* activate the queue */
3726 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3727 mqd->cp_hqd_active);
3729 if (ring->use_doorbell)
3730 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3735 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3737 struct amdgpu_device *adev = ring->adev;
3740 /* disable the queue if it's active */
3741 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3743 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3745 for (j = 0; j < adev->usec_timeout; j++) {
3746 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3751 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3752 DRM_DEBUG("KIQ dequeue request failed.\n");
3754 /* Manual disable if dequeue request times out */
3755 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3758 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3762 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3763 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3764 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3765 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3766 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3767 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3768 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3769 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3774 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3776 struct amdgpu_device *adev = ring->adev;
3777 struct v9_mqd *mqd = ring->mqd_ptr;
3778 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3779 struct v9_mqd *tmp_mqd;
3781 gfx_v9_0_kiq_setting(ring);
3783 /* GPU could be in bad state during probe, driver trigger the reset
3784 * after load the SMU, in this case , the mqd is not be initialized.
3785 * driver need to re-init the mqd.
3786 * check mqd->cp_hqd_pq_control since this value should not be 0
3788 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3789 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3790 /* for GPU_RESET case , reset MQD to a clean status */
3791 if (adev->gfx.mec.mqd_backup[mqd_idx])
3792 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3794 /* reset ring buffer */
3796 amdgpu_ring_clear_ring(ring);
3798 mutex_lock(&adev->srbm_mutex);
3799 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3800 gfx_v9_0_kiq_init_register(ring);
3801 soc15_grbm_select(adev, 0, 0, 0, 0);
3802 mutex_unlock(&adev->srbm_mutex);
3804 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3805 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3806 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3807 mutex_lock(&adev->srbm_mutex);
3808 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3809 gfx_v9_0_mqd_init(ring);
3810 gfx_v9_0_kiq_init_register(ring);
3811 soc15_grbm_select(adev, 0, 0, 0, 0);
3812 mutex_unlock(&adev->srbm_mutex);
3814 if (adev->gfx.mec.mqd_backup[mqd_idx])
3815 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3821 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3823 struct amdgpu_device *adev = ring->adev;
3824 struct v9_mqd *mqd = ring->mqd_ptr;
3825 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3826 struct v9_mqd *tmp_mqd;
3828 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3829 * is not be initialized before
3831 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3833 if (!tmp_mqd->cp_hqd_pq_control ||
3834 (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3835 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3836 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3837 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3838 mutex_lock(&adev->srbm_mutex);
3839 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3840 gfx_v9_0_mqd_init(ring);
3841 soc15_grbm_select(adev, 0, 0, 0, 0);
3842 mutex_unlock(&adev->srbm_mutex);
3844 if (adev->gfx.mec.mqd_backup[mqd_idx])
3845 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3846 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3847 /* reset MQD to a clean status */
3848 if (adev->gfx.mec.mqd_backup[mqd_idx])
3849 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3851 /* reset ring buffer */
3853 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3854 amdgpu_ring_clear_ring(ring);
3856 amdgpu_ring_clear_ring(ring);
3862 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3864 struct amdgpu_ring *ring;
3867 ring = &adev->gfx.kiq.ring;
3869 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3870 if (unlikely(r != 0))
3873 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3874 if (unlikely(r != 0))
3877 gfx_v9_0_kiq_init_queue(ring);
3878 amdgpu_bo_kunmap(ring->mqd_obj);
3879 ring->mqd_ptr = NULL;
3880 amdgpu_bo_unreserve(ring->mqd_obj);
3881 ring->sched.ready = true;
3885 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3887 struct amdgpu_ring *ring = NULL;
3890 gfx_v9_0_cp_compute_enable(adev, true);
3892 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3893 ring = &adev->gfx.compute_ring[i];
3895 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3896 if (unlikely(r != 0))
3898 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3900 r = gfx_v9_0_kcq_init_queue(ring);
3901 amdgpu_bo_kunmap(ring->mqd_obj);
3902 ring->mqd_ptr = NULL;
3904 amdgpu_bo_unreserve(ring->mqd_obj);
3909 r = amdgpu_gfx_enable_kcq(adev);
3914 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3917 struct amdgpu_ring *ring;
3919 if (!(adev->flags & AMD_IS_APU))
3920 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3922 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3923 if (adev->gfx.num_gfx_rings) {
3924 /* legacy firmware loading */
3925 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3930 r = gfx_v9_0_cp_compute_load_microcode(adev);
3935 r = gfx_v9_0_kiq_resume(adev);
3939 if (adev->gfx.num_gfx_rings) {
3940 r = gfx_v9_0_cp_gfx_resume(adev);
3945 r = gfx_v9_0_kcq_resume(adev);
3949 if (adev->gfx.num_gfx_rings) {
3950 ring = &adev->gfx.gfx_ring[0];
3951 r = amdgpu_ring_test_helper(ring);
3956 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3957 ring = &adev->gfx.compute_ring[i];
3958 amdgpu_ring_test_helper(ring);
3961 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3966 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3970 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3971 adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3974 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3975 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3976 adev->df.hash_status.hash_64k);
3977 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3978 adev->df.hash_status.hash_2m);
3979 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3980 adev->df.hash_status.hash_1g);
3981 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3984 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3986 if (adev->gfx.num_gfx_rings)
3987 gfx_v9_0_cp_gfx_enable(adev, enable);
3988 gfx_v9_0_cp_compute_enable(adev, enable);
3991 static int gfx_v9_0_hw_init(void *handle)
3994 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3996 if (!amdgpu_sriov_vf(adev))
3997 gfx_v9_0_init_golden_registers(adev);
3999 gfx_v9_0_constants_init(adev);
4001 gfx_v9_0_init_tcp_config(adev);
4003 r = adev->gfx.rlc.funcs->resume(adev);
4007 r = gfx_v9_0_cp_resume(adev);
4011 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4012 gfx_v9_4_2_set_power_brake_sequence(adev);
4017 static int gfx_v9_0_hw_fini(void *handle)
4019 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4021 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4022 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4023 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4025 /* DF freeze and kcq disable will fail */
4026 if (!amdgpu_ras_intr_triggered())
4027 /* disable KCQ to avoid CPC touch memory not valid anymore */
4028 amdgpu_gfx_disable_kcq(adev);
4030 if (amdgpu_sriov_vf(adev)) {
4031 gfx_v9_0_cp_gfx_enable(adev, false);
4032 /* must disable polling for SRIOV when hw finished, otherwise
4033 * CPC engine may still keep fetching WB address which is already
4034 * invalid after sw finished and trigger DMAR reading error in
4037 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4041 /* Use deinitialize sequence from CAIL when unbinding device from driver,
4042 * otherwise KIQ is hanging when binding back
4044 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4045 mutex_lock(&adev->srbm_mutex);
4046 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4047 adev->gfx.kiq.ring.pipe,
4048 adev->gfx.kiq.ring.queue, 0);
4049 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4050 soc15_grbm_select(adev, 0, 0, 0, 0);
4051 mutex_unlock(&adev->srbm_mutex);
4054 gfx_v9_0_cp_enable(adev, false);
4056 /* Skip suspend with A+A reset */
4057 if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
4058 dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
4062 adev->gfx.rlc.funcs->stop(adev);
4066 static int gfx_v9_0_suspend(void *handle)
4068 return gfx_v9_0_hw_fini(handle);
4071 static int gfx_v9_0_resume(void *handle)
4073 return gfx_v9_0_hw_init(handle);
4076 static bool gfx_v9_0_is_idle(void *handle)
4078 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4080 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4081 GRBM_STATUS, GUI_ACTIVE))
4087 static int gfx_v9_0_wait_for_idle(void *handle)
4090 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4092 for (i = 0; i < adev->usec_timeout; i++) {
4093 if (gfx_v9_0_is_idle(handle))
4100 static int gfx_v9_0_soft_reset(void *handle)
4102 u32 grbm_soft_reset = 0;
4104 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4107 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4108 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4109 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4110 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4111 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4112 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4113 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4114 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4115 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4116 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4117 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4120 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4121 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4122 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4126 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4127 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4128 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4129 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4132 if (grbm_soft_reset) {
4134 adev->gfx.rlc.funcs->stop(adev);
4136 if (adev->gfx.num_gfx_rings)
4137 /* Disable GFX parsing/prefetching */
4138 gfx_v9_0_cp_gfx_enable(adev, false);
4140 /* Disable MEC parsing/prefetching */
4141 gfx_v9_0_cp_compute_enable(adev, false);
4143 if (grbm_soft_reset) {
4144 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4145 tmp |= grbm_soft_reset;
4146 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4147 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4148 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4152 tmp &= ~grbm_soft_reset;
4153 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4154 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4157 /* Wait a little for things to settle down */
4163 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4165 signed long r, cnt = 0;
4166 unsigned long flags;
4167 uint32_t seq, reg_val_offs = 0;
4169 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4170 struct amdgpu_ring *ring = &kiq->ring;
4172 BUG_ON(!ring->funcs->emit_rreg);
4174 spin_lock_irqsave(&kiq->ring_lock, flags);
4175 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
4176 pr_err("critical bug! too many kiq readers\n");
4179 amdgpu_ring_alloc(ring, 32);
4180 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4181 amdgpu_ring_write(ring, 9 | /* src: register*/
4182 (5 << 8) | /* dst: memory */
4183 (1 << 16) | /* count sel */
4184 (1 << 20)); /* write confirm */
4185 amdgpu_ring_write(ring, 0);
4186 amdgpu_ring_write(ring, 0);
4187 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4189 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4191 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4195 amdgpu_ring_commit(ring);
4196 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4198 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4200 /* don't wait anymore for gpu reset case because this way may
4201 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4202 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4203 * never return if we keep waiting in virt_kiq_rreg, which cause
4204 * gpu_recover() hang there.
4206 * also don't wait anymore for IRQ context
4208 if (r < 1 && (amdgpu_in_reset(adev)))
4209 goto failed_kiq_read;
4212 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4213 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4214 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4217 if (cnt > MAX_KIQ_REG_TRY)
4218 goto failed_kiq_read;
4221 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4222 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4223 amdgpu_device_wb_free(adev, reg_val_offs);
4227 amdgpu_ring_undo(ring);
4229 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4232 amdgpu_device_wb_free(adev, reg_val_offs);
4233 pr_err("failed to read gpu clock\n");
4237 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4241 amdgpu_gfx_off_ctrl(adev, false);
4242 mutex_lock(&adev->gfx.gpu_clock_mutex);
4243 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4244 clock = gfx_v9_0_kiq_read_clock(adev);
4246 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4247 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4248 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4250 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4251 amdgpu_gfx_off_ctrl(adev, true);
4255 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4257 uint32_t gds_base, uint32_t gds_size,
4258 uint32_t gws_base, uint32_t gws_size,
4259 uint32_t oa_base, uint32_t oa_size)
4261 struct amdgpu_device *adev = ring->adev;
4264 gfx_v9_0_write_data_to_reg(ring, 0, false,
4265 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4269 gfx_v9_0_write_data_to_reg(ring, 0, false,
4270 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4274 gfx_v9_0_write_data_to_reg(ring, 0, false,
4275 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4276 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4279 gfx_v9_0_write_data_to_reg(ring, 0, false,
4280 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4281 (1 << (oa_size + oa_base)) - (1 << oa_base));
4284 static const u32 vgpr_init_compute_shader[] =
4286 0xb07c0000, 0xbe8000ff,
4287 0x000000f8, 0xbf110800,
4288 0x7e000280, 0x7e020280,
4289 0x7e040280, 0x7e060280,
4290 0x7e080280, 0x7e0a0280,
4291 0x7e0c0280, 0x7e0e0280,
4292 0x80808800, 0xbe803200,
4293 0xbf84fff5, 0xbf9c0000,
4294 0xd28c0001, 0x0001007f,
4295 0xd28d0001, 0x0002027e,
4296 0x10020288, 0xb8810904,
4297 0xb7814000, 0xd1196a01,
4298 0x00000301, 0xbe800087,
4299 0xbefc00c1, 0xd89c4000,
4300 0x00020201, 0xd89cc080,
4301 0x00040401, 0x320202ff,
4302 0x00000800, 0x80808100,
4303 0xbf84fff8, 0x7e020280,
4304 0xbf810000, 0x00000000,
4307 static const u32 sgpr_init_compute_shader[] =
4309 0xb07c0000, 0xbe8000ff,
4310 0x0000005f, 0xbee50080,
4311 0xbe812c65, 0xbe822c65,
4312 0xbe832c65, 0xbe842c65,
4313 0xbe852c65, 0xb77c0005,
4314 0x80808500, 0xbf84fff8,
4315 0xbe800080, 0xbf810000,
4318 static const u32 vgpr_init_compute_shader_arcturus[] = {
4319 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4320 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4321 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4322 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4323 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4324 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4325 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4326 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4327 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4328 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4329 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4330 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4331 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4332 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4333 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4334 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4335 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4336 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4337 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4338 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4339 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4340 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4341 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4342 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4343 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4344 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4345 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4346 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4347 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4348 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4349 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4350 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4351 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4352 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4353 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4354 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4355 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4356 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4357 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4358 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4359 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4360 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4361 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4362 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4363 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4364 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4365 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4366 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4367 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4368 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4369 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4370 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4371 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4372 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4373 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4374 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4375 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4376 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4377 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4378 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4379 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4380 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4381 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4382 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4383 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4384 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4385 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4386 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4387 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4388 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4389 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4390 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4391 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4392 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4393 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4394 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4395 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4396 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4397 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4398 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4399 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4400 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4401 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4402 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4403 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4404 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4405 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4406 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4407 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4408 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4409 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4410 0xbf84fff8, 0xbf810000,
4413 /* When below register arrays changed, please update gpr_reg_size,
4414 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4415 to cover all gfx9 ASICs */
4416 static const struct soc15_reg_entry vgpr_init_regs[] = {
4417 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4418 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4419 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4420 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4421 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4422 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4423 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4424 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4425 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4426 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4427 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4428 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4429 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4430 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4433 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4434 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4435 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4436 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4437 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4438 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4439 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4440 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4450 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4451 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4453 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4454 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4455 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4456 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4457 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4458 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4459 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4460 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4461 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4462 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4463 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4464 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4467 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4468 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4469 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4470 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4471 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4472 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4473 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4474 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4475 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4476 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4477 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4478 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4484 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4485 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4486 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4487 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4488 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4489 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4490 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4491 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4492 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4493 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4494 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4495 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4496 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4497 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4498 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4499 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4500 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4501 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4502 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4503 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4504 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4505 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4506 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4507 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4508 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4509 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4510 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4511 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4512 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4513 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4514 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4515 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4516 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4517 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4520 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4522 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4525 /* only support when RAS is enabled */
4526 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4529 r = amdgpu_ring_alloc(ring, 7);
4531 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4536 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4537 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4539 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4540 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4541 PACKET3_DMA_DATA_DST_SEL(1) |
4542 PACKET3_DMA_DATA_SRC_SEL(2) |
4543 PACKET3_DMA_DATA_ENGINE(0)));
4544 amdgpu_ring_write(ring, 0);
4545 amdgpu_ring_write(ring, 0);
4546 amdgpu_ring_write(ring, 0);
4547 amdgpu_ring_write(ring, 0);
4548 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4549 adev->gds.gds_size);
4551 amdgpu_ring_commit(ring);
4553 for (i = 0; i < adev->usec_timeout; i++) {
4554 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4559 if (i >= adev->usec_timeout)
4562 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4567 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4569 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4570 struct amdgpu_ib ib;
4571 struct dma_fence *f = NULL;
4573 unsigned total_size, vgpr_offset, sgpr_offset;
4576 int compute_dim_x = adev->gfx.config.max_shader_engines *
4577 adev->gfx.config.max_cu_per_sh *
4578 adev->gfx.config.max_sh_per_se;
4579 int sgpr_work_group_size = 5;
4580 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4581 int vgpr_init_shader_size;
4582 const u32 *vgpr_init_shader_ptr;
4583 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4585 /* only support when RAS is enabled */
4586 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4589 /* bail if the compute ring is not ready */
4590 if (!ring->sched.ready)
4593 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4594 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4595 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4596 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4598 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4599 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4600 vgpr_init_regs_ptr = vgpr_init_regs;
4604 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4606 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4608 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4609 total_size = ALIGN(total_size, 256);
4610 vgpr_offset = total_size;
4611 total_size += ALIGN(vgpr_init_shader_size, 256);
4612 sgpr_offset = total_size;
4613 total_size += sizeof(sgpr_init_compute_shader);
4615 /* allocate an indirect buffer to put the commands in */
4616 memset(&ib, 0, sizeof(ib));
4617 r = amdgpu_ib_get(adev, NULL, total_size,
4618 AMDGPU_IB_POOL_DIRECT, &ib);
4620 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4624 /* load the compute shaders */
4625 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4626 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4628 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4629 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4631 /* init the ib length to 0 */
4635 /* write the register state for the compute dispatch */
4636 for (i = 0; i < gpr_reg_size; i++) {
4637 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4638 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4639 - PACKET3_SET_SH_REG_START;
4640 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4642 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4643 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4644 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4645 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4646 - PACKET3_SET_SH_REG_START;
4647 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4648 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4650 /* write dispatch packet */
4651 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4652 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4653 ib.ptr[ib.length_dw++] = 1; /* y */
4654 ib.ptr[ib.length_dw++] = 1; /* z */
4655 ib.ptr[ib.length_dw++] =
4656 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4658 /* write CS partial flush packet */
4659 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4660 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4663 /* write the register state for the compute dispatch */
4664 for (i = 0; i < gpr_reg_size; i++) {
4665 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4666 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4667 - PACKET3_SET_SH_REG_START;
4668 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4670 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4671 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4672 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4673 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4674 - PACKET3_SET_SH_REG_START;
4675 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4676 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4678 /* write dispatch packet */
4679 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4680 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4681 ib.ptr[ib.length_dw++] = 1; /* y */
4682 ib.ptr[ib.length_dw++] = 1; /* z */
4683 ib.ptr[ib.length_dw++] =
4684 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4686 /* write CS partial flush packet */
4687 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4688 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4691 /* write the register state for the compute dispatch */
4692 for (i = 0; i < gpr_reg_size; i++) {
4693 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4694 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4695 - PACKET3_SET_SH_REG_START;
4696 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4698 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4699 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4701 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4702 - PACKET3_SET_SH_REG_START;
4703 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4704 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4706 /* write dispatch packet */
4707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4708 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4709 ib.ptr[ib.length_dw++] = 1; /* y */
4710 ib.ptr[ib.length_dw++] = 1; /* z */
4711 ib.ptr[ib.length_dw++] =
4712 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4714 /* write CS partial flush packet */
4715 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4716 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4718 /* shedule the ib on the ring */
4719 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4721 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4725 /* wait for the GPU to finish processing the IB */
4726 r = dma_fence_wait(f, false);
4728 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4733 amdgpu_ib_free(adev, &ib, NULL);
4739 static int gfx_v9_0_early_init(void *handle)
4741 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4743 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4744 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4745 adev->gfx.num_gfx_rings = 0;
4747 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4748 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4749 AMDGPU_MAX_COMPUTE_RINGS);
4750 gfx_v9_0_set_kiq_pm4_funcs(adev);
4751 gfx_v9_0_set_ring_funcs(adev);
4752 gfx_v9_0_set_irq_funcs(adev);
4753 gfx_v9_0_set_gds_init(adev);
4754 gfx_v9_0_set_rlc_funcs(adev);
4759 static int gfx_v9_0_ecc_late_init(void *handle)
4761 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4765 * Temp workaround to fix the issue that CP firmware fails to
4766 * update read pointer when CPDMA is writing clearing operation
4767 * to GDS in suspend/resume sequence on several cards. So just
4768 * limit this operation in cold boot sequence.
4770 if ((!adev->in_suspend) &&
4771 (adev->gds.gds_size)) {
4772 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4777 /* requires IBs so do in late init after IB pool is initialized */
4778 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4779 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4781 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4786 if (adev->gfx.ras_funcs &&
4787 adev->gfx.ras_funcs->ras_late_init) {
4788 r = adev->gfx.ras_funcs->ras_late_init(adev);
4793 if (adev->gfx.ras_funcs &&
4794 adev->gfx.ras_funcs->enable_watchdog_timer)
4795 adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4800 static int gfx_v9_0_late_init(void *handle)
4802 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4805 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4809 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4813 r = gfx_v9_0_ecc_late_init(handle);
4820 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4822 uint32_t rlc_setting;
4824 /* if RLC is not enabled, do nothing */
4825 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4826 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4832 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4837 data = RLC_SAFE_MODE__CMD_MASK;
4838 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4839 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4841 /* wait for RLC_SAFE_MODE */
4842 for (i = 0; i < adev->usec_timeout; i++) {
4843 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4849 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4853 data = RLC_SAFE_MODE__CMD_MASK;
4854 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4857 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4860 amdgpu_gfx_rlc_enter_safe_mode(adev);
4862 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4863 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4864 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4865 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4867 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4868 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4869 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4872 amdgpu_gfx_rlc_exit_safe_mode(adev);
4875 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4878 /* TODO: double check if we need to perform under safe mode */
4879 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4881 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4882 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4884 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4886 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4887 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4889 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4891 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4894 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4899 amdgpu_gfx_rlc_enter_safe_mode(adev);
4901 /* It is disabled by HW by default */
4902 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4903 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4904 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4906 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4907 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4909 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4910 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4911 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4913 /* only for Vega10 & Raven1 */
4914 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4917 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4919 /* MGLS is a global flag to control all MGLS in GFX */
4920 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4921 /* 2 - RLC memory Light sleep */
4922 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4923 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4924 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4926 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4928 /* 3 - CP memory Light sleep */
4929 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4930 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4931 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4933 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4937 /* 1 - MGCG_OVERRIDE */
4938 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4940 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4941 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4943 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4944 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4945 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4946 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4949 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4951 /* 2 - disable MGLS in RLC */
4952 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4953 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4954 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4955 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4958 /* 3 - disable MGLS in CP */
4959 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4960 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4961 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4962 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4966 amdgpu_gfx_rlc_exit_safe_mode(adev);
4969 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4974 if (!adev->gfx.num_gfx_rings)
4977 amdgpu_gfx_rlc_enter_safe_mode(adev);
4979 /* Enable 3D CGCG/CGLS */
4981 /* write cmd to clear cgcg/cgls ov */
4982 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4983 /* unset CGCG override */
4984 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4985 /* update CGCG and CGLS override bits */
4987 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4989 /* enable 3Dcgcg FSM(0x0000363f) */
4990 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4992 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4993 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4994 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4996 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4998 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4999 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5000 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5002 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5004 /* set IDLE_POLL_COUNT(0x00900100) */
5005 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5006 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5007 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5009 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5011 /* Disable CGCG/CGLS */
5012 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5013 /* disable cgcg, cgls should be disabled */
5014 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5015 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5016 /* disable cgcg and cgls in FSM */
5018 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5021 amdgpu_gfx_rlc_exit_safe_mode(adev);
5024 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5029 amdgpu_gfx_rlc_enter_safe_mode(adev);
5031 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5032 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5033 /* unset CGCG override */
5034 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5035 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5036 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5038 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5039 /* update CGCG and CGLS override bits */
5041 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5043 /* enable cgcg FSM(0x0000363F) */
5044 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5046 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5047 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5048 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5050 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5051 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5052 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5053 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5054 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5056 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5058 /* set IDLE_POLL_COUNT(0x00900100) */
5059 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5060 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5061 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5063 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5065 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5066 /* reset CGCG/CGLS bits */
5067 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5068 /* disable cgcg and cgls in FSM */
5070 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5073 amdgpu_gfx_rlc_exit_safe_mode(adev);
5076 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5080 /* CGCG/CGLS should be enabled after MGCG/MGLS
5081 * === MGCG + MGLS ===
5083 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5084 /* === CGCG /CGLS for GFX 3D Only === */
5085 gfx_v9_0_update_3d_clock_gating(adev, enable);
5086 /* === CGCG + CGLS === */
5087 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5089 /* CGCG/CGLS should be disabled before MGCG/MGLS
5090 * === CGCG + CGLS ===
5092 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5093 /* === CGCG /CGLS for GFX 3D Only === */
5094 gfx_v9_0_update_3d_clock_gating(adev, enable);
5095 /* === MGCG + MGLS === */
5096 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5101 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5105 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5106 if (amdgpu_sriov_is_pp_one_vf(adev))
5107 data = RREG32_NO_KIQ(reg);
5111 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5112 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5114 if (amdgpu_sriov_is_pp_one_vf(adev))
5115 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5117 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5120 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5122 struct soc15_reg_rlcg *entries, int arr_size)
5130 for (i = 0; i < arr_size; i++) {
5131 const struct soc15_reg_rlcg *entry;
5133 entry = &entries[i];
5134 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5142 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5144 return gfx_v9_0_check_rlcg_range(adev, offset,
5145 (void *)rlcg_access_gc_9_0,
5146 ARRAY_SIZE(rlcg_access_gc_9_0));
5149 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5150 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5151 .set_safe_mode = gfx_v9_0_set_safe_mode,
5152 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5153 .init = gfx_v9_0_rlc_init,
5154 .get_csb_size = gfx_v9_0_get_csb_size,
5155 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5156 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5157 .resume = gfx_v9_0_rlc_resume,
5158 .stop = gfx_v9_0_rlc_stop,
5159 .reset = gfx_v9_0_rlc_reset,
5160 .start = gfx_v9_0_rlc_start,
5161 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5162 .sriov_wreg = gfx_v9_0_sriov_wreg,
5163 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5166 static int gfx_v9_0_set_powergating_state(void *handle,
5167 enum amd_powergating_state state)
5169 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5170 bool enable = (state == AMD_PG_STATE_GATE);
5172 switch (adev->ip_versions[GC_HWIP][0]) {
5173 case IP_VERSION(9, 2, 2):
5174 case IP_VERSION(9, 1, 0):
5175 case IP_VERSION(9, 3, 0):
5177 amdgpu_gfx_off_ctrl(adev, false);
5179 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5180 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5181 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5183 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5184 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5187 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5188 gfx_v9_0_enable_cp_power_gating(adev, true);
5190 gfx_v9_0_enable_cp_power_gating(adev, false);
5192 /* update gfx cgpg state */
5193 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5195 /* update mgcg state */
5196 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5199 amdgpu_gfx_off_ctrl(adev, true);
5201 case IP_VERSION(9, 2, 1):
5202 amdgpu_gfx_off_ctrl(adev, enable);
5211 static int gfx_v9_0_set_clockgating_state(void *handle,
5212 enum amd_clockgating_state state)
5214 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5216 if (amdgpu_sriov_vf(adev))
5219 switch (adev->ip_versions[GC_HWIP][0]) {
5220 case IP_VERSION(9, 0, 1):
5221 case IP_VERSION(9, 2, 1):
5222 case IP_VERSION(9, 4, 0):
5223 case IP_VERSION(9, 2, 2):
5224 case IP_VERSION(9, 1, 0):
5225 case IP_VERSION(9, 4, 1):
5226 case IP_VERSION(9, 3, 0):
5227 case IP_VERSION(9, 4, 2):
5228 gfx_v9_0_update_gfx_clock_gating(adev,
5229 state == AMD_CG_STATE_GATE);
5237 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5239 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5242 if (amdgpu_sriov_vf(adev))
5245 /* AMD_CG_SUPPORT_GFX_MGCG */
5246 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5247 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5248 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5250 /* AMD_CG_SUPPORT_GFX_CGCG */
5251 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5252 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5253 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5255 /* AMD_CG_SUPPORT_GFX_CGLS */
5256 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5257 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5259 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5260 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5261 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5262 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5264 /* AMD_CG_SUPPORT_GFX_CP_LS */
5265 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5266 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5267 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5269 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5270 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5271 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5272 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5273 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5275 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5276 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5277 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5281 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5283 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5286 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5288 struct amdgpu_device *adev = ring->adev;
5291 /* XXX check if swapping is necessary on BE */
5292 if (ring->use_doorbell) {
5293 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5295 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5296 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5302 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5304 struct amdgpu_device *adev = ring->adev;
5306 if (ring->use_doorbell) {
5307 /* XXX check if swapping is necessary on BE */
5308 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5309 WDOORBELL64(ring->doorbell_index, ring->wptr);
5311 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5312 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5316 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5318 struct amdgpu_device *adev = ring->adev;
5319 u32 ref_and_mask, reg_mem_engine;
5320 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5322 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5325 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5328 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5335 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5336 reg_mem_engine = 1; /* pfp */
5339 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5340 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5341 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5342 ref_and_mask, ref_and_mask, 0x20);
5345 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5346 struct amdgpu_job *job,
5347 struct amdgpu_ib *ib,
5350 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5351 u32 header, control = 0;
5353 if (ib->flags & AMDGPU_IB_FLAG_CE)
5354 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5356 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5358 control |= ib->length_dw | (vmid << 24);
5360 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5361 control |= INDIRECT_BUFFER_PRE_ENB(1);
5363 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5364 gfx_v9_0_ring_emit_de_meta(ring);
5367 amdgpu_ring_write(ring, header);
5368 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5369 amdgpu_ring_write(ring,
5373 lower_32_bits(ib->gpu_addr));
5374 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5375 amdgpu_ring_write(ring, control);
5378 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5379 struct amdgpu_job *job,
5380 struct amdgpu_ib *ib,
5383 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5384 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5386 /* Currently, there is a high possibility to get wave ID mismatch
5387 * between ME and GDS, leading to a hw deadlock, because ME generates
5388 * different wave IDs than the GDS expects. This situation happens
5389 * randomly when at least 5 compute pipes use GDS ordered append.
5390 * The wave IDs generated by ME are also wrong after suspend/resume.
5391 * Those are probably bugs somewhere else in the kernel driver.
5393 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5394 * GDS to 0 for this ring (me/pipe).
5396 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5397 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5398 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5399 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5402 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5403 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5404 amdgpu_ring_write(ring,
5408 lower_32_bits(ib->gpu_addr));
5409 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5410 amdgpu_ring_write(ring, control);
5413 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5414 u64 seq, unsigned flags)
5416 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5417 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5418 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5420 /* RELEASE_MEM - flush caches, send int */
5421 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5422 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5423 EOP_TC_NC_ACTION_EN) :
5424 (EOP_TCL1_ACTION_EN |
5426 EOP_TC_WB_ACTION_EN |
5427 EOP_TC_MD_ACTION_EN)) |
5428 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5430 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5433 * the address should be Qword aligned if 64bit write, Dword
5434 * aligned if only send 32bit data low (discard data high)
5440 amdgpu_ring_write(ring, lower_32_bits(addr));
5441 amdgpu_ring_write(ring, upper_32_bits(addr));
5442 amdgpu_ring_write(ring, lower_32_bits(seq));
5443 amdgpu_ring_write(ring, upper_32_bits(seq));
5444 amdgpu_ring_write(ring, 0);
5447 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5449 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5450 uint32_t seq = ring->fence_drv.sync_seq;
5451 uint64_t addr = ring->fence_drv.gpu_addr;
5453 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5454 lower_32_bits(addr), upper_32_bits(addr),
5455 seq, 0xffffffff, 4);
5458 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5459 unsigned vmid, uint64_t pd_addr)
5461 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5463 /* compute doesn't have PFP */
5464 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5465 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5466 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5467 amdgpu_ring_write(ring, 0x0);
5471 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5473 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5476 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5480 /* XXX check if swapping is necessary on BE */
5481 if (ring->use_doorbell)
5482 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5488 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5490 struct amdgpu_device *adev = ring->adev;
5492 /* XXX check if swapping is necessary on BE */
5493 if (ring->use_doorbell) {
5494 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5495 WDOORBELL64(ring->doorbell_index, ring->wptr);
5497 BUG(); /* only DOORBELL method supported on gfx9 now */
5501 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5502 u64 seq, unsigned int flags)
5504 struct amdgpu_device *adev = ring->adev;
5506 /* we only allocate 32bit for each seq wb address */
5507 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5509 /* write fence seq to the "addr" */
5510 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5511 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5512 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5513 amdgpu_ring_write(ring, lower_32_bits(addr));
5514 amdgpu_ring_write(ring, upper_32_bits(addr));
5515 amdgpu_ring_write(ring, lower_32_bits(seq));
5517 if (flags & AMDGPU_FENCE_FLAG_INT) {
5518 /* set register to trigger INT */
5519 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5520 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5521 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5522 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5523 amdgpu_ring_write(ring, 0);
5524 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5528 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5530 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5531 amdgpu_ring_write(ring, 0);
5534 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5536 struct v9_ce_ib_state ce_payload = {0};
5540 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5541 csa_addr = amdgpu_csa_vaddr(ring->adev);
5543 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5544 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5545 WRITE_DATA_DST_SEL(8) |
5547 WRITE_DATA_CACHE_POLICY(0));
5548 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5549 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5550 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5553 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5555 struct v9_de_ib_state de_payload = {0};
5556 uint64_t csa_addr, gds_addr;
5559 csa_addr = amdgpu_csa_vaddr(ring->adev);
5560 gds_addr = csa_addr + 4096;
5561 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5562 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5564 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5565 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5566 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5567 WRITE_DATA_DST_SEL(8) |
5569 WRITE_DATA_CACHE_POLICY(0));
5570 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5571 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5572 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5575 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5578 uint32_t v = secure ? FRAME_TMZ : 0;
5580 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5581 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5584 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5588 if (amdgpu_sriov_vf(ring->adev))
5589 gfx_v9_0_ring_emit_ce_meta(ring);
5591 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5592 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5593 /* set load_global_config & load_global_uconfig */
5595 /* set load_cs_sh_regs */
5597 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5600 /* set load_ce_ram if preamble presented */
5601 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5604 /* still load_ce_ram if this is the first time preamble presented
5605 * although there is no context switch happens.
5607 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5611 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5612 amdgpu_ring_write(ring, dw2);
5613 amdgpu_ring_write(ring, 0);
5616 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5619 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5620 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5621 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5622 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5623 ret = ring->wptr & ring->buf_mask;
5624 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5628 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5631 BUG_ON(offset > ring->buf_mask);
5632 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5634 cur = (ring->wptr & ring->buf_mask) - 1;
5635 if (likely(cur > offset))
5636 ring->ring[offset] = cur - offset;
5638 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5641 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5642 uint32_t reg_val_offs)
5644 struct amdgpu_device *adev = ring->adev;
5646 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5647 amdgpu_ring_write(ring, 0 | /* src: register*/
5648 (5 << 8) | /* dst: memory */
5649 (1 << 20)); /* write confirm */
5650 amdgpu_ring_write(ring, reg);
5651 amdgpu_ring_write(ring, 0);
5652 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5654 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5658 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5663 switch (ring->funcs->type) {
5664 case AMDGPU_RING_TYPE_GFX:
5665 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5667 case AMDGPU_RING_TYPE_KIQ:
5668 cmd = (1 << 16); /* no inc addr */
5674 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5675 amdgpu_ring_write(ring, cmd);
5676 amdgpu_ring_write(ring, reg);
5677 amdgpu_ring_write(ring, 0);
5678 amdgpu_ring_write(ring, val);
5681 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5682 uint32_t val, uint32_t mask)
5684 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5687 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5688 uint32_t reg0, uint32_t reg1,
5689 uint32_t ref, uint32_t mask)
5691 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5692 struct amdgpu_device *adev = ring->adev;
5693 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5694 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5697 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5700 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5704 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5706 struct amdgpu_device *adev = ring->adev;
5709 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5710 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5711 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5712 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5713 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5716 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5717 enum amdgpu_interrupt_state state)
5720 case AMDGPU_IRQ_STATE_DISABLE:
5721 case AMDGPU_IRQ_STATE_ENABLE:
5722 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5723 TIME_STAMP_INT_ENABLE,
5724 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5731 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5733 enum amdgpu_interrupt_state state)
5735 u32 mec_int_cntl, mec_int_cntl_reg;
5738 * amdgpu controls only the first MEC. That's why this function only
5739 * handles the setting of interrupts for this specific MEC. All other
5740 * pipes' interrupts are set by amdkfd.
5746 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5749 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5752 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5755 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5758 DRM_DEBUG("invalid pipe %d\n", pipe);
5762 DRM_DEBUG("invalid me %d\n", me);
5767 case AMDGPU_IRQ_STATE_DISABLE:
5768 mec_int_cntl = RREG32(mec_int_cntl_reg);
5769 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5770 TIME_STAMP_INT_ENABLE, 0);
5771 WREG32(mec_int_cntl_reg, mec_int_cntl);
5773 case AMDGPU_IRQ_STATE_ENABLE:
5774 mec_int_cntl = RREG32(mec_int_cntl_reg);
5775 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5776 TIME_STAMP_INT_ENABLE, 1);
5777 WREG32(mec_int_cntl_reg, mec_int_cntl);
5784 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5785 struct amdgpu_irq_src *source,
5787 enum amdgpu_interrupt_state state)
5790 case AMDGPU_IRQ_STATE_DISABLE:
5791 case AMDGPU_IRQ_STATE_ENABLE:
5792 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5793 PRIV_REG_INT_ENABLE,
5794 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5803 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5804 struct amdgpu_irq_src *source,
5806 enum amdgpu_interrupt_state state)
5809 case AMDGPU_IRQ_STATE_DISABLE:
5810 case AMDGPU_IRQ_STATE_ENABLE:
5811 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5812 PRIV_INSTR_INT_ENABLE,
5813 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5822 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5823 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5824 CP_ECC_ERROR_INT_ENABLE, 1)
5826 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5827 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5828 CP_ECC_ERROR_INT_ENABLE, 0)
5830 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5831 struct amdgpu_irq_src *source,
5833 enum amdgpu_interrupt_state state)
5836 case AMDGPU_IRQ_STATE_DISABLE:
5837 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5838 CP_ECC_ERROR_INT_ENABLE, 0);
5839 DISABLE_ECC_ON_ME_PIPE(1, 0);
5840 DISABLE_ECC_ON_ME_PIPE(1, 1);
5841 DISABLE_ECC_ON_ME_PIPE(1, 2);
5842 DISABLE_ECC_ON_ME_PIPE(1, 3);
5845 case AMDGPU_IRQ_STATE_ENABLE:
5846 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5847 CP_ECC_ERROR_INT_ENABLE, 1);
5848 ENABLE_ECC_ON_ME_PIPE(1, 0);
5849 ENABLE_ECC_ON_ME_PIPE(1, 1);
5850 ENABLE_ECC_ON_ME_PIPE(1, 2);
5851 ENABLE_ECC_ON_ME_PIPE(1, 3);
5861 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5862 struct amdgpu_irq_src *src,
5864 enum amdgpu_interrupt_state state)
5867 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5868 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5870 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5871 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5873 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5874 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5876 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5877 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5879 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5880 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5882 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5883 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5885 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5886 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5888 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5889 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5891 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5892 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5900 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5901 struct amdgpu_irq_src *source,
5902 struct amdgpu_iv_entry *entry)
5905 u8 me_id, pipe_id, queue_id;
5906 struct amdgpu_ring *ring;
5908 DRM_DEBUG("IH: CP EOP\n");
5909 me_id = (entry->ring_id & 0x0c) >> 2;
5910 pipe_id = (entry->ring_id & 0x03) >> 0;
5911 queue_id = (entry->ring_id & 0x70) >> 4;
5915 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5919 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5920 ring = &adev->gfx.compute_ring[i];
5921 /* Per-queue interrupt is supported for MEC starting from VI.
5922 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5924 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5925 amdgpu_fence_process(ring);
5932 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5933 struct amdgpu_iv_entry *entry)
5935 u8 me_id, pipe_id, queue_id;
5936 struct amdgpu_ring *ring;
5939 me_id = (entry->ring_id & 0x0c) >> 2;
5940 pipe_id = (entry->ring_id & 0x03) >> 0;
5941 queue_id = (entry->ring_id & 0x70) >> 4;
5945 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5949 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5950 ring = &adev->gfx.compute_ring[i];
5951 if (ring->me == me_id && ring->pipe == pipe_id &&
5952 ring->queue == queue_id)
5953 drm_sched_fault(&ring->sched);
5959 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5960 struct amdgpu_irq_src *source,
5961 struct amdgpu_iv_entry *entry)
5963 DRM_ERROR("Illegal register access in command stream\n");
5964 gfx_v9_0_fault(adev, entry);
5968 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5969 struct amdgpu_irq_src *source,
5970 struct amdgpu_iv_entry *entry)
5972 DRM_ERROR("Illegal instruction in command stream\n");
5973 gfx_v9_0_fault(adev, entry);
5978 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5979 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5980 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5981 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5983 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5984 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5985 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5987 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5988 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5991 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5992 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5995 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5996 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5997 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5999 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6000 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6003 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6004 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6005 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6007 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6008 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6009 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6011 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6012 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6015 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6016 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6019 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6020 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6023 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6024 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6025 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6027 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6028 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6031 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6032 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6033 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6035 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6036 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6037 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6038 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6040 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6041 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6042 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6045 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6046 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6047 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6048 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6050 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6051 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6052 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6053 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6055 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6056 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6057 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6058 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6060 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6061 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6062 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6063 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6065 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6066 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6069 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6070 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6071 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6073 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6074 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6077 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6078 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6081 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6082 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6085 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6086 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6089 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6090 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6093 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6094 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6097 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6098 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6099 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6101 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6102 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6103 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6105 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6106 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6107 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6109 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6110 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6111 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6113 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6114 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6115 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6117 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6118 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6121 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6122 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6125 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6126 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6129 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6130 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6133 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6134 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6137 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6138 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6141 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6142 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6145 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6146 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6149 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6150 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6153 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6154 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6157 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6158 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6161 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6162 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6165 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6166 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6169 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6170 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6173 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6174 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6175 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6177 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6178 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6179 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6181 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6182 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6185 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6186 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6189 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6190 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6193 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6194 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6195 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6197 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6198 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6199 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6201 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6202 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6203 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6205 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6206 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6207 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6209 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6210 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6213 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6214 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6215 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6217 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6218 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6219 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6221 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6222 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6223 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6225 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6226 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6227 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6229 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6230 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6231 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6233 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6234 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6235 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6237 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6238 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6239 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6241 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6242 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6243 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6245 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6246 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6247 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6249 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6250 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6251 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6253 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6254 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6255 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6257 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6258 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6259 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6261 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6262 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6263 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6265 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6266 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6267 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6269 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6270 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6271 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6273 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6274 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6275 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6277 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6278 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6279 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6281 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6282 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6285 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6286 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6289 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6290 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6293 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6294 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6297 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6298 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6301 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6302 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6303 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6305 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6306 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6307 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6309 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6310 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6311 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6313 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6314 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6315 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6317 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6318 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6319 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6321 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6322 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6325 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6326 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6329 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6330 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6333 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6334 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6337 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6338 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6341 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6342 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6343 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6345 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6346 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6347 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6349 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6350 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6351 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6353 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6354 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6355 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6357 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6358 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6359 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6361 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6362 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6365 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6366 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6369 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6370 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6373 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6374 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6377 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6378 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6381 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6382 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6383 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6385 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6386 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6387 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6389 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6390 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6391 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6393 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6394 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6397 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6398 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6401 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6402 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6405 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6406 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6409 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6410 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6413 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6414 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6419 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6422 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6424 struct ta_ras_trigger_error_input block_info = { 0 };
6426 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6429 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6432 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6435 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6437 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6438 ras_gfx_subblocks[info->head.sub_block_index].name,
6443 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6445 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6446 ras_gfx_subblocks[info->head.sub_block_index].name,
6451 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6452 block_info.sub_block_index =
6453 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6454 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6455 block_info.address = info->address;
6456 block_info.value = info->value;
6458 mutex_lock(&adev->grbm_idx_mutex);
6459 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6460 mutex_unlock(&adev->grbm_idx_mutex);
6465 static const char *vml2_mems[] = {
6466 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6467 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6468 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6469 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6470 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6471 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6472 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6473 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6474 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6475 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6476 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6477 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6478 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6479 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6480 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6481 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6484 static const char *vml2_walker_mems[] = {
6485 "UTC_VML2_CACHE_PDE0_MEM0",
6486 "UTC_VML2_CACHE_PDE0_MEM1",
6487 "UTC_VML2_CACHE_PDE1_MEM0",
6488 "UTC_VML2_CACHE_PDE1_MEM1",
6489 "UTC_VML2_CACHE_PDE2_MEM0",
6490 "UTC_VML2_CACHE_PDE2_MEM1",
6491 "UTC_VML2_RDIF_LOG_FIFO",
6494 static const char *atc_l2_cache_2m_mems[] = {
6495 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6496 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6497 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6498 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6501 static const char *atc_l2_cache_4k_mems[] = {
6502 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6503 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6504 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6505 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6506 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6507 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6508 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6509 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6510 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6511 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6512 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6513 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6514 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6515 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6516 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6517 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6518 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6519 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6520 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6521 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6522 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6523 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6524 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6525 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6526 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6527 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6528 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6529 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6530 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6531 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6532 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6533 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6536 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6537 struct ras_err_data *err_data)
6540 uint32_t sec_count, ded_count;
6542 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6543 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6544 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6545 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6546 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6547 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6548 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6549 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6551 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6552 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6553 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6555 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6557 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6558 "SEC %d\n", i, vml2_mems[i], sec_count);
6559 err_data->ce_count += sec_count;
6562 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6564 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6565 "DED %d\n", i, vml2_mems[i], ded_count);
6566 err_data->ue_count += ded_count;
6570 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6571 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6572 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6574 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6577 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6578 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6579 err_data->ce_count += sec_count;
6582 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6585 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6586 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6587 err_data->ue_count += ded_count;
6591 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6592 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6593 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6595 sec_count = (data & 0x00006000L) >> 0xd;
6597 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6598 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6600 err_data->ce_count += sec_count;
6604 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6605 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6606 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6608 sec_count = (data & 0x00006000L) >> 0xd;
6610 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6611 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6613 err_data->ce_count += sec_count;
6616 ded_count = (data & 0x00018000L) >> 0xf;
6618 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6619 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6621 err_data->ue_count += ded_count;
6625 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6626 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6627 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6628 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6633 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6634 const struct soc15_reg_entry *reg,
6635 uint32_t se_id, uint32_t inst_id, uint32_t value,
6636 uint32_t *sec_count, uint32_t *ded_count)
6639 uint32_t sec_cnt, ded_cnt;
6641 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6642 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6643 gfx_v9_0_ras_fields[i].seg != reg->seg ||
6644 gfx_v9_0_ras_fields[i].inst != reg->inst)
6648 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6649 gfx_v9_0_ras_fields[i].sec_count_shift;
6651 dev_info(adev->dev, "GFX SubBlock %s, "
6652 "Instance[%d][%d], SEC %d\n",
6653 gfx_v9_0_ras_fields[i].name,
6656 *sec_count += sec_cnt;
6660 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6661 gfx_v9_0_ras_fields[i].ded_count_shift;
6663 dev_info(adev->dev, "GFX SubBlock %s, "
6664 "Instance[%d][%d], DED %d\n",
6665 gfx_v9_0_ras_fields[i].name,
6668 *ded_count += ded_cnt;
6675 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6679 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6682 /* read back registers to clear the counters */
6683 mutex_lock(&adev->grbm_idx_mutex);
6684 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6685 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6686 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6687 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6688 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6692 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6693 mutex_unlock(&adev->grbm_idx_mutex);
6695 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6696 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6697 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6698 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6699 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6700 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6701 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6702 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6704 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6705 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6706 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6709 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6710 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6711 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6714 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6715 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6716 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6719 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6720 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6721 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6724 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6725 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6726 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6727 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6730 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6731 void *ras_error_status)
6733 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6734 uint32_t sec_count = 0, ded_count = 0;
6738 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6741 err_data->ue_count = 0;
6742 err_data->ce_count = 0;
6744 mutex_lock(&adev->grbm_idx_mutex);
6746 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6747 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6748 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6749 gfx_v9_0_select_se_sh(adev, j, 0, k);
6751 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6753 gfx_v9_0_ras_error_count(adev,
6754 &gfx_v9_0_edc_counter_regs[i],
6756 &sec_count, &ded_count);
6761 err_data->ce_count += sec_count;
6762 err_data->ue_count += ded_count;
6764 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6765 mutex_unlock(&adev->grbm_idx_mutex);
6767 gfx_v9_0_query_utc_edc_status(adev, err_data);
6772 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6774 const unsigned int cp_coher_cntl =
6775 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6776 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6777 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6778 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6779 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6781 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6782 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6783 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6784 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6785 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
6786 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6787 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6788 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6791 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6792 uint32_t pipe, bool enable)
6794 struct amdgpu_device *adev = ring->adev;
6796 uint32_t wcl_cs_reg;
6798 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6799 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6803 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6806 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6809 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6812 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6815 DRM_DEBUG("invalid pipe %d\n", pipe);
6819 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6822 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6824 struct amdgpu_device *adev = ring->adev;
6829 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6830 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6831 * around 25% of gpu resources.
6833 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6834 amdgpu_ring_emit_wreg(ring,
6835 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6838 /* Restrict waves for normal/low priority compute queues as well
6839 * to get best QoS for high priority compute jobs.
6841 * amdgpu controls only 1st ME(0-3 CS pipes).
6843 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6844 if (i != ring->pipe)
6845 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6850 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6852 .early_init = gfx_v9_0_early_init,
6853 .late_init = gfx_v9_0_late_init,
6854 .sw_init = gfx_v9_0_sw_init,
6855 .sw_fini = gfx_v9_0_sw_fini,
6856 .hw_init = gfx_v9_0_hw_init,
6857 .hw_fini = gfx_v9_0_hw_fini,
6858 .suspend = gfx_v9_0_suspend,
6859 .resume = gfx_v9_0_resume,
6860 .is_idle = gfx_v9_0_is_idle,
6861 .wait_for_idle = gfx_v9_0_wait_for_idle,
6862 .soft_reset = gfx_v9_0_soft_reset,
6863 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6864 .set_powergating_state = gfx_v9_0_set_powergating_state,
6865 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6868 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6869 .type = AMDGPU_RING_TYPE_GFX,
6871 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6872 .support_64bit_ptrs = true,
6873 .vmhub = AMDGPU_GFXHUB_0,
6874 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6875 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6876 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6877 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6879 7 + /* PIPELINE_SYNC */
6880 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6881 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6883 8 + /* FENCE for VM_FLUSH */
6884 20 + /* GDS switch */
6885 4 + /* double SWITCH_BUFFER,
6886 the first COND_EXEC jump to the place just
6887 prior to this double SWITCH_BUFFER */
6895 8 + 8 + /* FENCE x2 */
6896 2 + /* SWITCH_BUFFER */
6897 7, /* gfx_v9_0_emit_mem_sync */
6898 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6899 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6900 .emit_fence = gfx_v9_0_ring_emit_fence,
6901 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6902 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6903 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6904 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6905 .test_ring = gfx_v9_0_ring_test_ring,
6906 .test_ib = gfx_v9_0_ring_test_ib,
6907 .insert_nop = amdgpu_ring_insert_nop,
6908 .pad_ib = amdgpu_ring_generic_pad_ib,
6909 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6910 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6911 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6912 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6913 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6914 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6915 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6916 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6917 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6918 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6921 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6922 .type = AMDGPU_RING_TYPE_COMPUTE,
6924 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6925 .support_64bit_ptrs = true,
6926 .vmhub = AMDGPU_GFXHUB_0,
6927 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6928 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6929 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6931 20 + /* gfx_v9_0_ring_emit_gds_switch */
6932 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6933 5 + /* hdp invalidate */
6934 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6935 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6936 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6937 2 + /* gfx_v9_0_ring_emit_vm_flush */
6938 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6939 7 + /* gfx_v9_0_emit_mem_sync */
6940 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6941 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6942 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6943 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6944 .emit_fence = gfx_v9_0_ring_emit_fence,
6945 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6946 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6947 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6948 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6949 .test_ring = gfx_v9_0_ring_test_ring,
6950 .test_ib = gfx_v9_0_ring_test_ib,
6951 .insert_nop = amdgpu_ring_insert_nop,
6952 .pad_ib = amdgpu_ring_generic_pad_ib,
6953 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6954 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6955 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6956 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6957 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
6960 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6961 .type = AMDGPU_RING_TYPE_KIQ,
6963 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6964 .support_64bit_ptrs = true,
6965 .vmhub = AMDGPU_GFXHUB_0,
6966 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6967 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6968 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6970 20 + /* gfx_v9_0_ring_emit_gds_switch */
6971 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6972 5 + /* hdp invalidate */
6973 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6974 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6975 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6976 2 + /* gfx_v9_0_ring_emit_vm_flush */
6977 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6978 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6979 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6980 .test_ring = gfx_v9_0_ring_test_ring,
6981 .insert_nop = amdgpu_ring_insert_nop,
6982 .pad_ib = amdgpu_ring_generic_pad_ib,
6983 .emit_rreg = gfx_v9_0_ring_emit_rreg,
6984 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6985 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6986 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6989 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6993 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6995 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6996 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6998 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6999 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7002 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7003 .set = gfx_v9_0_set_eop_interrupt_state,
7004 .process = gfx_v9_0_eop_irq,
7007 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7008 .set = gfx_v9_0_set_priv_reg_fault_state,
7009 .process = gfx_v9_0_priv_reg_irq,
7012 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7013 .set = gfx_v9_0_set_priv_inst_fault_state,
7014 .process = gfx_v9_0_priv_inst_irq,
7017 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7018 .set = gfx_v9_0_set_cp_ecc_error_state,
7019 .process = amdgpu_gfx_cp_ecc_error_irq,
7023 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7025 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7026 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7028 adev->gfx.priv_reg_irq.num_types = 1;
7029 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7031 adev->gfx.priv_inst_irq.num_types = 1;
7032 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7034 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7035 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7038 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7040 switch (adev->ip_versions[GC_HWIP][0]) {
7041 case IP_VERSION(9, 0, 1):
7042 case IP_VERSION(9, 2, 1):
7043 case IP_VERSION(9, 4, 0):
7044 case IP_VERSION(9, 2, 2):
7045 case IP_VERSION(9, 1, 0):
7046 case IP_VERSION(9, 4, 1):
7047 case IP_VERSION(9, 3, 0):
7048 case IP_VERSION(9, 4, 2):
7049 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7056 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7058 /* init asci gds info */
7059 switch (adev->ip_versions[GC_HWIP][0]) {
7060 case IP_VERSION(9, 0, 1):
7061 case IP_VERSION(9, 2, 1):
7062 case IP_VERSION(9, 4, 0):
7063 adev->gds.gds_size = 0x10000;
7065 case IP_VERSION(9, 2, 2):
7066 case IP_VERSION(9, 1, 0):
7067 case IP_VERSION(9, 4, 1):
7068 adev->gds.gds_size = 0x1000;
7070 case IP_VERSION(9, 4, 2):
7071 /* aldebaran removed all the GDS internal memory,
7072 * only support GWS opcode in kernel, like barrier
7074 adev->gds.gds_size = 0;
7077 adev->gds.gds_size = 0x10000;
7081 switch (adev->ip_versions[GC_HWIP][0]) {
7082 case IP_VERSION(9, 0, 1):
7083 case IP_VERSION(9, 4, 0):
7084 adev->gds.gds_compute_max_wave_id = 0x7ff;
7086 case IP_VERSION(9, 2, 1):
7087 adev->gds.gds_compute_max_wave_id = 0x27f;
7089 case IP_VERSION(9, 2, 2):
7090 case IP_VERSION(9, 1, 0):
7091 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7092 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7094 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7096 case IP_VERSION(9, 4, 1):
7097 adev->gds.gds_compute_max_wave_id = 0xfff;
7099 case IP_VERSION(9, 4, 2):
7100 /* deprecated for Aldebaran, no usage at all */
7101 adev->gds.gds_compute_max_wave_id = 0;
7104 /* this really depends on the chip */
7105 adev->gds.gds_compute_max_wave_id = 0x7ff;
7109 adev->gds.gws_size = 64;
7110 adev->gds.oa_size = 16;
7113 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7121 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7122 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7124 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7127 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7131 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7132 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7134 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7135 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7137 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7139 return (~data) & mask;
7142 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7143 struct amdgpu_cu_info *cu_info)
7145 int i, j, k, counter, active_cu_number = 0;
7146 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7147 unsigned disable_masks[4 * 4];
7149 if (!adev || !cu_info)
7153 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7155 if (adev->gfx.config.max_shader_engines *
7156 adev->gfx.config.max_sh_per_se > 16)
7159 amdgpu_gfx_parse_disable_cu(disable_masks,
7160 adev->gfx.config.max_shader_engines,
7161 adev->gfx.config.max_sh_per_se);
7163 mutex_lock(&adev->grbm_idx_mutex);
7164 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7165 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7169 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7170 gfx_v9_0_set_user_cu_inactive_bitmap(
7171 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7172 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7175 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7176 * 4x4 size array, and it's usually suitable for Vega
7177 * ASICs which has 4*2 SE/SH layout.
7178 * But for Arcturus, SE/SH layout is changed to 8*1.
7179 * To mostly reduce the impact, we make it compatible
7180 * with current bitmap array as below:
7181 * SE4,SH0 --> bitmap[0][1]
7182 * SE5,SH0 --> bitmap[1][1]
7183 * SE6,SH0 --> bitmap[2][1]
7184 * SE7,SH0 --> bitmap[3][1]
7186 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7188 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7189 if (bitmap & mask) {
7190 if (counter < adev->gfx.config.max_cu_per_sh)
7196 active_cu_number += counter;
7198 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7199 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7202 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7203 mutex_unlock(&adev->grbm_idx_mutex);
7205 cu_info->number = active_cu_number;
7206 cu_info->ao_cu_mask = ao_cu_mask;
7207 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7212 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7214 .type = AMD_IP_BLOCK_TYPE_GFX,
7218 .funcs = &gfx_v9_0_ip_funcs,