Merge branch 'for-5.11/i2c-hid' into for-linus
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56
57 #define GFX9_NUM_GFX_RINGS     1
58 #define GFX9_MEC_HPD_SIZE 4096
59 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
60 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
61
62 #define mmGCEA_PROBE_MAP                        0x070c
63 #define mmGCEA_PROBE_MAP_BASE_IDX               0
64
65 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
71
72 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
78
79 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
85
86 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
87 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
88 MODULE_FIRMWARE("amdgpu/raven_me.bin");
89 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
91 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
92
93 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
100
101 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
108
109 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
110 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
126
127 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
128 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
130 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
132 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
133 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
134 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
135 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
136 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
137 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
138 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
139
140 enum ta_ras_gfx_subblock {
141         /*CPC*/
142         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
143         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
144         TA_RAS_BLOCK__GFX_CPC_UCODE,
145         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
146         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
147         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
148         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
149         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
150         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
151         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
152         /* CPF*/
153         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
154         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
155         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
156         TA_RAS_BLOCK__GFX_CPF_TAG,
157         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
158         /* CPG*/
159         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
160         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
161         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
162         TA_RAS_BLOCK__GFX_CPG_TAG,
163         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
164         /* GDS*/
165         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
166         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
167         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
168         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
169         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
170         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
171         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
172         /* SPI*/
173         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
174         /* SQ*/
175         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
176         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
177         TA_RAS_BLOCK__GFX_SQ_LDS_D,
178         TA_RAS_BLOCK__GFX_SQ_LDS_I,
179         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
180         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
181         /* SQC (3 ranges)*/
182         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
183         /* SQC range 0*/
184         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
185         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
186                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
187         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
188         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
189         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
190         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
191         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
192         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
193         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
194                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
195         /* SQC range 1*/
196         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
198                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
199         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
200         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
201         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
203         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
204         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
205         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
206         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
207         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
208                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
209         /* SQC range 2*/
210         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
211         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
212                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
213         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
214         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
215         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
216         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
217         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
218         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
219         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
220         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
221         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
222                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
223         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
224         /* TA*/
225         TA_RAS_BLOCK__GFX_TA_INDEX_START,
226         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
227         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
228         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
229         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
230         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
231         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
232         /* TCA*/
233         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
234         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
235         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
236         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
237         /* TCC (5 sub-ranges)*/
238         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
239         /* TCC range 0*/
240         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
241         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
242         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
243         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
244         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
245         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
246         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
247         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
248         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
249         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
250         /* TCC range 1*/
251         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
252         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
253         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
254         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
255                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
256         /* TCC range 2*/
257         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
258         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
259         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
260         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
261         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
262         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
263         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
264         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
265         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
266         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
267                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
268         /* TCC range 3*/
269         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
270         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
271         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
272         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
273                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
274         /* TCC range 4*/
275         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
276         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
277                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
278         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
279         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
280                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
281         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
282         /* TCI*/
283         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
284         /* TCP*/
285         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
286         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
287         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
288         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
289         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
290         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
291         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
292         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
293         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
294         /* TD*/
295         TA_RAS_BLOCK__GFX_TD_INDEX_START,
296         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
297         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
298         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
299         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
300         /* EA (3 sub-ranges)*/
301         TA_RAS_BLOCK__GFX_EA_INDEX_START,
302         /* EA range 0*/
303         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
304         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
305         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
306         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
307         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
308         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
309         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
310         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
311         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
312         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
313         /* EA range 1*/
314         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
315         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
316         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
317         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
318         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
319         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
320         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
321         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
322         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
323         /* EA range 2*/
324         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
325         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
326         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
327         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
328         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
329         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
330         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
331         /* UTC VM L2 bank*/
332         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
333         /* UTC VM walker*/
334         TA_RAS_BLOCK__UTC_VML2_WALKER,
335         /* UTC ATC L2 2MB cache*/
336         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
337         /* UTC ATC L2 4KB cache*/
338         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
339         TA_RAS_BLOCK__GFX_MAX
340 };
341
342 struct ras_gfx_subblock {
343         unsigned char *name;
344         int ta_subblock;
345         int hw_supported_error_type;
346         int sw_supported_error_type;
347 };
348
349 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
350         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
351                 #subblock,                                                     \
352                 TA_RAS_BLOCK__##subblock,                                      \
353                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
354                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
355         }
356
357 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
358         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
359         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
360         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
365         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
369         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
371         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
375                              0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
377                              0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
382         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390                              0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394                              0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
398                              1),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
400                              0, 0, 0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
412                              0, 0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
418                              0, 0, 0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
426                              0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
430                              0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
438         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
440         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
442                              1),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
444                              1),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
446                              1),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
448                              0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
450                              0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
463                              0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
466                              0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
468                              0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
470                              0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
480         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
501         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
502         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
505 };
506
507 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
508 {
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
529 };
530
531 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
532 {
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
551 };
552
553 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
554 {
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
566 };
567
568 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
569 {
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
594 };
595
596 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
597 {
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
605 };
606
607 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
608 {
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
628 };
629
630 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
631 {
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
644 };
645
646 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
647 {
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
651 };
652
653 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
654 {
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
671 };
672
673 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
674 {
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
688 };
689
690 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
691 {
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
703 };
704
705 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
706         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
707         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
708 };
709
710 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
711 {
712         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
720 };
721
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
723 {
724         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
732 };
733
734 static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
735 {
736         static void *scratch_reg0;
737         static void *scratch_reg1;
738         static void *scratch_reg2;
739         static void *scratch_reg3;
740         static void *spare_int;
741         static uint32_t grbm_cntl;
742         static uint32_t grbm_idx;
743
744         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
745         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
746         scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
747         scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
748         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
749
750         grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
751         grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
752
753         if (amdgpu_sriov_runtime(adev)) {
754                 pr_err("shouldn't call rlcg write register during runtime\n");
755                 return;
756         }
757
758         if (offset == grbm_cntl || offset == grbm_idx) {
759                 if (offset  == grbm_cntl)
760                         writel(v, scratch_reg2);
761                 else if (offset == grbm_idx)
762                         writel(v, scratch_reg3);
763
764                 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
765         } else {
766                 uint32_t i = 0;
767                 uint32_t retries = 50000;
768
769                 writel(v, scratch_reg0);
770                 writel(offset | 0x80000000, scratch_reg1);
771                 writel(1, spare_int);
772                 for (i = 0; i < retries; i++) {
773                         u32 tmp;
774
775                         tmp = readl(scratch_reg1);
776                         if (!(tmp & 0x80000000))
777                                 break;
778
779                         udelay(10);
780                 }
781                 if (i >= retries)
782                         pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
783         }
784
785 }
786
787 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
788 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
789 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
790 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
791
792 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
793 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
794 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
795 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
796 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
797                                  struct amdgpu_cu_info *cu_info);
798 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
799 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
800 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
801 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
802                                           void *ras_error_status);
803 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
804                                      void *inject_if);
805 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
806
807 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
808                                 uint64_t queue_mask)
809 {
810         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
811         amdgpu_ring_write(kiq_ring,
812                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
813                 /* vmid_mask:0* queue_type:0 (KIQ) */
814                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
815         amdgpu_ring_write(kiq_ring,
816                         lower_32_bits(queue_mask));     /* queue mask lo */
817         amdgpu_ring_write(kiq_ring,
818                         upper_32_bits(queue_mask));     /* queue mask hi */
819         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
820         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
821         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
822         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
823 }
824
825 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
826                                  struct amdgpu_ring *ring)
827 {
828         struct amdgpu_device *adev = kiq_ring->adev;
829         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
830         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
831         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
832
833         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
834         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
835         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
836                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
837                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
838                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
839                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
840                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
841                          /*queue_type: normal compute queue */
842                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
843                          /* alloc format: all_on_one_pipe */
844                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
845                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
846                          /* num_queues: must be 1 */
847                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
848         amdgpu_ring_write(kiq_ring,
849                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
850         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
851         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
852         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
853         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
854 }
855
856 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
857                                    struct amdgpu_ring *ring,
858                                    enum amdgpu_unmap_queues_action action,
859                                    u64 gpu_addr, u64 seq)
860 {
861         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
862
863         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
864         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
865                           PACKET3_UNMAP_QUEUES_ACTION(action) |
866                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
867                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
868                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
869         amdgpu_ring_write(kiq_ring,
870                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
871
872         if (action == PREEMPT_QUEUES_NO_UNMAP) {
873                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
874                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
875                 amdgpu_ring_write(kiq_ring, seq);
876         } else {
877                 amdgpu_ring_write(kiq_ring, 0);
878                 amdgpu_ring_write(kiq_ring, 0);
879                 amdgpu_ring_write(kiq_ring, 0);
880         }
881 }
882
883 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
884                                    struct amdgpu_ring *ring,
885                                    u64 addr,
886                                    u64 seq)
887 {
888         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
889
890         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
891         amdgpu_ring_write(kiq_ring,
892                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
893                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
894                           PACKET3_QUERY_STATUS_COMMAND(2));
895         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
896         amdgpu_ring_write(kiq_ring,
897                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
898                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
899         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
900         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
901         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
902         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
903 }
904
905 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
906                                 uint16_t pasid, uint32_t flush_type,
907                                 bool all_hub)
908 {
909         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
910         amdgpu_ring_write(kiq_ring,
911                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
912                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
913                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
914                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
915 }
916
917 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
918         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
919         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
920         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
921         .kiq_query_status = gfx_v9_0_kiq_query_status,
922         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
923         .set_resources_size = 8,
924         .map_queues_size = 7,
925         .unmap_queues_size = 6,
926         .query_status_size = 7,
927         .invalidate_tlbs_size = 2,
928 };
929
930 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
931 {
932         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
933 }
934
935 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
936 {
937         switch (adev->asic_type) {
938         case CHIP_VEGA10:
939                 soc15_program_register_sequence(adev,
940                                                 golden_settings_gc_9_0,
941                                                 ARRAY_SIZE(golden_settings_gc_9_0));
942                 soc15_program_register_sequence(adev,
943                                                 golden_settings_gc_9_0_vg10,
944                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
945                 break;
946         case CHIP_VEGA12:
947                 soc15_program_register_sequence(adev,
948                                                 golden_settings_gc_9_2_1,
949                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
950                 soc15_program_register_sequence(adev,
951                                                 golden_settings_gc_9_2_1_vg12,
952                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
953                 break;
954         case CHIP_VEGA20:
955                 soc15_program_register_sequence(adev,
956                                                 golden_settings_gc_9_0,
957                                                 ARRAY_SIZE(golden_settings_gc_9_0));
958                 soc15_program_register_sequence(adev,
959                                                 golden_settings_gc_9_0_vg20,
960                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
961                 break;
962         case CHIP_ARCTURUS:
963                 soc15_program_register_sequence(adev,
964                                                 golden_settings_gc_9_4_1_arct,
965                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
966                 break;
967         case CHIP_RAVEN:
968                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
969                                                 ARRAY_SIZE(golden_settings_gc_9_1));
970                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
971                         soc15_program_register_sequence(adev,
972                                                         golden_settings_gc_9_1_rv2,
973                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
974                 else
975                         soc15_program_register_sequence(adev,
976                                                         golden_settings_gc_9_1_rv1,
977                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
978                 break;
979          case CHIP_RENOIR:
980                 soc15_program_register_sequence(adev,
981                                                 golden_settings_gc_9_1_rn,
982                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
983                 return; /* for renoir, don't need common goldensetting */
984         default:
985                 break;
986         }
987
988         if (adev->asic_type != CHIP_ARCTURUS)
989                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
990                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
991 }
992
993 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
994 {
995         adev->gfx.scratch.num_reg = 8;
996         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
997         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
998 }
999
1000 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1001                                        bool wc, uint32_t reg, uint32_t val)
1002 {
1003         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1004         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1005                                 WRITE_DATA_DST_SEL(0) |
1006                                 (wc ? WR_CONFIRM : 0));
1007         amdgpu_ring_write(ring, reg);
1008         amdgpu_ring_write(ring, 0);
1009         amdgpu_ring_write(ring, val);
1010 }
1011
1012 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1013                                   int mem_space, int opt, uint32_t addr0,
1014                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1015                                   uint32_t inv)
1016 {
1017         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1018         amdgpu_ring_write(ring,
1019                                  /* memory (1) or register (0) */
1020                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1021                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1022                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1023                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1024
1025         if (mem_space)
1026                 BUG_ON(addr0 & 0x3); /* Dword align */
1027         amdgpu_ring_write(ring, addr0);
1028         amdgpu_ring_write(ring, addr1);
1029         amdgpu_ring_write(ring, ref);
1030         amdgpu_ring_write(ring, mask);
1031         amdgpu_ring_write(ring, inv); /* poll interval */
1032 }
1033
1034 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1035 {
1036         struct amdgpu_device *adev = ring->adev;
1037         uint32_t scratch;
1038         uint32_t tmp = 0;
1039         unsigned i;
1040         int r;
1041
1042         r = amdgpu_gfx_scratch_get(adev, &scratch);
1043         if (r)
1044                 return r;
1045
1046         WREG32(scratch, 0xCAFEDEAD);
1047         r = amdgpu_ring_alloc(ring, 3);
1048         if (r)
1049                 goto error_free_scratch;
1050
1051         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1052         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1053         amdgpu_ring_write(ring, 0xDEADBEEF);
1054         amdgpu_ring_commit(ring);
1055
1056         for (i = 0; i < adev->usec_timeout; i++) {
1057                 tmp = RREG32(scratch);
1058                 if (tmp == 0xDEADBEEF)
1059                         break;
1060                 udelay(1);
1061         }
1062
1063         if (i >= adev->usec_timeout)
1064                 r = -ETIMEDOUT;
1065
1066 error_free_scratch:
1067         amdgpu_gfx_scratch_free(adev, scratch);
1068         return r;
1069 }
1070
1071 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1072 {
1073         struct amdgpu_device *adev = ring->adev;
1074         struct amdgpu_ib ib;
1075         struct dma_fence *f = NULL;
1076
1077         unsigned index;
1078         uint64_t gpu_addr;
1079         uint32_t tmp;
1080         long r;
1081
1082         r = amdgpu_device_wb_get(adev, &index);
1083         if (r)
1084                 return r;
1085
1086         gpu_addr = adev->wb.gpu_addr + (index * 4);
1087         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1088         memset(&ib, 0, sizeof(ib));
1089         r = amdgpu_ib_get(adev, NULL, 16,
1090                                         AMDGPU_IB_POOL_DIRECT, &ib);
1091         if (r)
1092                 goto err1;
1093
1094         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1095         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1096         ib.ptr[2] = lower_32_bits(gpu_addr);
1097         ib.ptr[3] = upper_32_bits(gpu_addr);
1098         ib.ptr[4] = 0xDEADBEEF;
1099         ib.length_dw = 5;
1100
1101         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1102         if (r)
1103                 goto err2;
1104
1105         r = dma_fence_wait_timeout(f, false, timeout);
1106         if (r == 0) {
1107                 r = -ETIMEDOUT;
1108                 goto err2;
1109         } else if (r < 0) {
1110                 goto err2;
1111         }
1112
1113         tmp = adev->wb.wb[index];
1114         if (tmp == 0xDEADBEEF)
1115                 r = 0;
1116         else
1117                 r = -EINVAL;
1118
1119 err2:
1120         amdgpu_ib_free(adev, &ib, NULL);
1121         dma_fence_put(f);
1122 err1:
1123         amdgpu_device_wb_free(adev, index);
1124         return r;
1125 }
1126
1127
1128 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1129 {
1130         release_firmware(adev->gfx.pfp_fw);
1131         adev->gfx.pfp_fw = NULL;
1132         release_firmware(adev->gfx.me_fw);
1133         adev->gfx.me_fw = NULL;
1134         release_firmware(adev->gfx.ce_fw);
1135         adev->gfx.ce_fw = NULL;
1136         release_firmware(adev->gfx.rlc_fw);
1137         adev->gfx.rlc_fw = NULL;
1138         release_firmware(adev->gfx.mec_fw);
1139         adev->gfx.mec_fw = NULL;
1140         release_firmware(adev->gfx.mec2_fw);
1141         adev->gfx.mec2_fw = NULL;
1142
1143         kfree(adev->gfx.rlc.register_list_format);
1144 }
1145
1146 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1147 {
1148         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1149
1150         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1151         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1152         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1153         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1154         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1155         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1156         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1157         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1158         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1159         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1160         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1161         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1162         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1163         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1164                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1165 }
1166
1167 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1168 {
1169         adev->gfx.me_fw_write_wait = false;
1170         adev->gfx.mec_fw_write_wait = false;
1171
1172         if ((adev->asic_type != CHIP_ARCTURUS) &&
1173             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1174             (adev->gfx.mec_feature_version < 46) ||
1175             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1176             (adev->gfx.pfp_feature_version < 46)))
1177                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1178
1179         switch (adev->asic_type) {
1180         case CHIP_VEGA10:
1181                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1182                     (adev->gfx.me_feature_version >= 42) &&
1183                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1184                     (adev->gfx.pfp_feature_version >= 42))
1185                         adev->gfx.me_fw_write_wait = true;
1186
1187                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1188                     (adev->gfx.mec_feature_version >= 42))
1189                         adev->gfx.mec_fw_write_wait = true;
1190                 break;
1191         case CHIP_VEGA12:
1192                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1193                     (adev->gfx.me_feature_version >= 44) &&
1194                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1195                     (adev->gfx.pfp_feature_version >= 44))
1196                         adev->gfx.me_fw_write_wait = true;
1197
1198                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1199                     (adev->gfx.mec_feature_version >= 44))
1200                         adev->gfx.mec_fw_write_wait = true;
1201                 break;
1202         case CHIP_VEGA20:
1203                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1204                     (adev->gfx.me_feature_version >= 44) &&
1205                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1206                     (adev->gfx.pfp_feature_version >= 44))
1207                         adev->gfx.me_fw_write_wait = true;
1208
1209                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1210                     (adev->gfx.mec_feature_version >= 44))
1211                         adev->gfx.mec_fw_write_wait = true;
1212                 break;
1213         case CHIP_RAVEN:
1214                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1215                     (adev->gfx.me_feature_version >= 42) &&
1216                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1217                     (adev->gfx.pfp_feature_version >= 42))
1218                         adev->gfx.me_fw_write_wait = true;
1219
1220                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1221                     (adev->gfx.mec_feature_version >= 42))
1222                         adev->gfx.mec_fw_write_wait = true;
1223                 break;
1224         default:
1225                 adev->gfx.me_fw_write_wait = true;
1226                 adev->gfx.mec_fw_write_wait = true;
1227                 break;
1228         }
1229 }
1230
1231 struct amdgpu_gfxoff_quirk {
1232         u16 chip_vendor;
1233         u16 chip_device;
1234         u16 subsys_vendor;
1235         u16 subsys_device;
1236         u8 revision;
1237 };
1238
1239 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1240         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1241         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1242         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1243         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1244         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1245         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1246         { 0, 0, 0, 0, 0 },
1247 };
1248
1249 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1250 {
1251         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1252
1253         while (p && p->chip_device != 0) {
1254                 if (pdev->vendor == p->chip_vendor &&
1255                     pdev->device == p->chip_device &&
1256                     pdev->subsystem_vendor == p->subsys_vendor &&
1257                     pdev->subsystem_device == p->subsys_device &&
1258                     pdev->revision == p->revision) {
1259                         return true;
1260                 }
1261                 ++p;
1262         }
1263         return false;
1264 }
1265
1266 static bool is_raven_kicker(struct amdgpu_device *adev)
1267 {
1268         if (adev->pm.fw_version >= 0x41e2b)
1269                 return true;
1270         else
1271                 return false;
1272 }
1273
1274 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1275 {
1276         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1277                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1278
1279         switch (adev->asic_type) {
1280         case CHIP_VEGA10:
1281         case CHIP_VEGA12:
1282         case CHIP_VEGA20:
1283                 break;
1284         case CHIP_RAVEN:
1285                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1286                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1287                     ((!is_raven_kicker(adev) &&
1288                       adev->gfx.rlc_fw_version < 531) ||
1289                      (adev->gfx.rlc_feature_version < 1) ||
1290                      !adev->gfx.rlc.is_rlc_v2_1))
1291                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1292
1293                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1294                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1295                                 AMD_PG_SUPPORT_CP |
1296                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1297                 break;
1298         case CHIP_RENOIR:
1299                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1300                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1301                                 AMD_PG_SUPPORT_CP |
1302                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1303                 break;
1304         default:
1305                 break;
1306         }
1307 }
1308
1309 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1310                                           const char *chip_name)
1311 {
1312         char fw_name[30];
1313         int err;
1314         struct amdgpu_firmware_info *info = NULL;
1315         const struct common_firmware_header *header = NULL;
1316         const struct gfx_firmware_header_v1_0 *cp_hdr;
1317
1318         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1319         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1320         if (err)
1321                 goto out;
1322         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1323         if (err)
1324                 goto out;
1325         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1326         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1327         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1328
1329         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1330         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1331         if (err)
1332                 goto out;
1333         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1334         if (err)
1335                 goto out;
1336         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1337         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1338         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1339
1340         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1341         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1342         if (err)
1343                 goto out;
1344         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1345         if (err)
1346                 goto out;
1347         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1348         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1349         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1350
1351         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1352                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1353                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1354                 info->fw = adev->gfx.pfp_fw;
1355                 header = (const struct common_firmware_header *)info->fw->data;
1356                 adev->firmware.fw_size +=
1357                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1358
1359                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1360                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1361                 info->fw = adev->gfx.me_fw;
1362                 header = (const struct common_firmware_header *)info->fw->data;
1363                 adev->firmware.fw_size +=
1364                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1365
1366                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1367                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1368                 info->fw = adev->gfx.ce_fw;
1369                 header = (const struct common_firmware_header *)info->fw->data;
1370                 adev->firmware.fw_size +=
1371                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1372         }
1373
1374 out:
1375         if (err) {
1376                 dev_err(adev->dev,
1377                         "gfx9: Failed to load firmware \"%s\"\n",
1378                         fw_name);
1379                 release_firmware(adev->gfx.pfp_fw);
1380                 adev->gfx.pfp_fw = NULL;
1381                 release_firmware(adev->gfx.me_fw);
1382                 adev->gfx.me_fw = NULL;
1383                 release_firmware(adev->gfx.ce_fw);
1384                 adev->gfx.ce_fw = NULL;
1385         }
1386         return err;
1387 }
1388
1389 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1390                                           const char *chip_name)
1391 {
1392         char fw_name[30];
1393         int err;
1394         struct amdgpu_firmware_info *info = NULL;
1395         const struct common_firmware_header *header = NULL;
1396         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1397         unsigned int *tmp = NULL;
1398         unsigned int i = 0;
1399         uint16_t version_major;
1400         uint16_t version_minor;
1401         uint32_t smu_version;
1402
1403         /*
1404          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1405          * instead of picasso_rlc.bin.
1406          * Judgment method:
1407          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1408          *          or revision >= 0xD8 && revision <= 0xDF
1409          * otherwise is PCO FP5
1410          */
1411         if (!strcmp(chip_name, "picasso") &&
1412                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1413                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1414                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1415         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1416                 (smu_version >= 0x41e2b))
1417                 /**
1418                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1419                 */
1420                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1421         else
1422                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1423         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1424         if (err)
1425                 goto out;
1426         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1427         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1428
1429         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1430         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1431         if (version_major == 2 && version_minor == 1)
1432                 adev->gfx.rlc.is_rlc_v2_1 = true;
1433
1434         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1435         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1436         adev->gfx.rlc.save_and_restore_offset =
1437                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1438         adev->gfx.rlc.clear_state_descriptor_offset =
1439                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1440         adev->gfx.rlc.avail_scratch_ram_locations =
1441                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1442         adev->gfx.rlc.reg_restore_list_size =
1443                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1444         adev->gfx.rlc.reg_list_format_start =
1445                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1446         adev->gfx.rlc.reg_list_format_separate_start =
1447                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1448         adev->gfx.rlc.starting_offsets_start =
1449                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1450         adev->gfx.rlc.reg_list_format_size_bytes =
1451                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1452         adev->gfx.rlc.reg_list_size_bytes =
1453                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1454         adev->gfx.rlc.register_list_format =
1455                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1456                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1457         if (!adev->gfx.rlc.register_list_format) {
1458                 err = -ENOMEM;
1459                 goto out;
1460         }
1461
1462         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1463                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1464         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1465                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1466
1467         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1468
1469         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1470                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1471         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1472                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1473
1474         if (adev->gfx.rlc.is_rlc_v2_1)
1475                 gfx_v9_0_init_rlc_ext_microcode(adev);
1476
1477         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1478                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1479                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1480                 info->fw = adev->gfx.rlc_fw;
1481                 header = (const struct common_firmware_header *)info->fw->data;
1482                 adev->firmware.fw_size +=
1483                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1484
1485                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1486                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1487                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1488                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1489                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1490                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1491                         info->fw = adev->gfx.rlc_fw;
1492                         adev->firmware.fw_size +=
1493                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1494
1495                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1496                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1497                         info->fw = adev->gfx.rlc_fw;
1498                         adev->firmware.fw_size +=
1499                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1500
1501                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1502                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1503                         info->fw = adev->gfx.rlc_fw;
1504                         adev->firmware.fw_size +=
1505                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1506                 }
1507         }
1508
1509 out:
1510         if (err) {
1511                 dev_err(adev->dev,
1512                         "gfx9: Failed to load firmware \"%s\"\n",
1513                         fw_name);
1514                 release_firmware(adev->gfx.rlc_fw);
1515                 adev->gfx.rlc_fw = NULL;
1516         }
1517         return err;
1518 }
1519
1520 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1521                                           const char *chip_name)
1522 {
1523         char fw_name[30];
1524         int err;
1525         struct amdgpu_firmware_info *info = NULL;
1526         const struct common_firmware_header *header = NULL;
1527         const struct gfx_firmware_header_v1_0 *cp_hdr;
1528
1529         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1530         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1531         if (err)
1532                 goto out;
1533         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1534         if (err)
1535                 goto out;
1536         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1537         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1538         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1539
1540
1541         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1542         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1543         if (!err) {
1544                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1545                 if (err)
1546                         goto out;
1547                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1548                 adev->gfx.mec2_fw->data;
1549                 adev->gfx.mec2_fw_version =
1550                 le32_to_cpu(cp_hdr->header.ucode_version);
1551                 adev->gfx.mec2_feature_version =
1552                 le32_to_cpu(cp_hdr->ucode_feature_version);
1553         } else {
1554                 err = 0;
1555                 adev->gfx.mec2_fw = NULL;
1556         }
1557
1558         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1559                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1560                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1561                 info->fw = adev->gfx.mec_fw;
1562                 header = (const struct common_firmware_header *)info->fw->data;
1563                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1564                 adev->firmware.fw_size +=
1565                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1566
1567                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1568                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1569                 info->fw = adev->gfx.mec_fw;
1570                 adev->firmware.fw_size +=
1571                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1572
1573                 if (adev->gfx.mec2_fw) {
1574                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1575                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1576                         info->fw = adev->gfx.mec2_fw;
1577                         header = (const struct common_firmware_header *)info->fw->data;
1578                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1579                         adev->firmware.fw_size +=
1580                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1581
1582                         /* TODO: Determine if MEC2 JT FW loading can be removed
1583                                  for all GFX V9 asic and above */
1584                         if (adev->asic_type != CHIP_ARCTURUS &&
1585                             adev->asic_type != CHIP_RENOIR) {
1586                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1587                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1588                                 info->fw = adev->gfx.mec2_fw;
1589                                 adev->firmware.fw_size +=
1590                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1591                                         PAGE_SIZE);
1592                         }
1593                 }
1594         }
1595
1596 out:
1597         gfx_v9_0_check_if_need_gfxoff(adev);
1598         gfx_v9_0_check_fw_write_wait(adev);
1599         if (err) {
1600                 dev_err(adev->dev,
1601                         "gfx9: Failed to load firmware \"%s\"\n",
1602                         fw_name);
1603                 release_firmware(adev->gfx.mec_fw);
1604                 adev->gfx.mec_fw = NULL;
1605                 release_firmware(adev->gfx.mec2_fw);
1606                 adev->gfx.mec2_fw = NULL;
1607         }
1608         return err;
1609 }
1610
1611 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1612 {
1613         const char *chip_name;
1614         int r;
1615
1616         DRM_DEBUG("\n");
1617
1618         switch (adev->asic_type) {
1619         case CHIP_VEGA10:
1620                 chip_name = "vega10";
1621                 break;
1622         case CHIP_VEGA12:
1623                 chip_name = "vega12";
1624                 break;
1625         case CHIP_VEGA20:
1626                 chip_name = "vega20";
1627                 break;
1628         case CHIP_RAVEN:
1629                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1630                         chip_name = "raven2";
1631                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1632                         chip_name = "picasso";
1633                 else
1634                         chip_name = "raven";
1635                 break;
1636         case CHIP_ARCTURUS:
1637                 chip_name = "arcturus";
1638                 break;
1639         case CHIP_RENOIR:
1640                 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1641                         chip_name = "renoir";
1642                 else
1643                         chip_name = "green_sardine";
1644                 break;
1645         default:
1646                 BUG();
1647         }
1648
1649         /* No CPG in Arcturus */
1650         if (adev->asic_type != CHIP_ARCTURUS) {
1651                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1652                 if (r)
1653                         return r;
1654         }
1655
1656         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1657         if (r)
1658                 return r;
1659
1660         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1661         if (r)
1662                 return r;
1663
1664         return r;
1665 }
1666
1667 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1668 {
1669         u32 count = 0;
1670         const struct cs_section_def *sect = NULL;
1671         const struct cs_extent_def *ext = NULL;
1672
1673         /* begin clear state */
1674         count += 2;
1675         /* context control state */
1676         count += 3;
1677
1678         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1679                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1680                         if (sect->id == SECT_CONTEXT)
1681                                 count += 2 + ext->reg_count;
1682                         else
1683                                 return 0;
1684                 }
1685         }
1686
1687         /* end clear state */
1688         count += 2;
1689         /* clear state */
1690         count += 2;
1691
1692         return count;
1693 }
1694
1695 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1696                                     volatile u32 *buffer)
1697 {
1698         u32 count = 0, i;
1699         const struct cs_section_def *sect = NULL;
1700         const struct cs_extent_def *ext = NULL;
1701
1702         if (adev->gfx.rlc.cs_data == NULL)
1703                 return;
1704         if (buffer == NULL)
1705                 return;
1706
1707         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1708         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1709
1710         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1711         buffer[count++] = cpu_to_le32(0x80000000);
1712         buffer[count++] = cpu_to_le32(0x80000000);
1713
1714         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1715                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1716                         if (sect->id == SECT_CONTEXT) {
1717                                 buffer[count++] =
1718                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1719                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1720                                                 PACKET3_SET_CONTEXT_REG_START);
1721                                 for (i = 0; i < ext->reg_count; i++)
1722                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1723                         } else {
1724                                 return;
1725                         }
1726                 }
1727         }
1728
1729         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1730         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1731
1732         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1733         buffer[count++] = cpu_to_le32(0);
1734 }
1735
1736 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1737 {
1738         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1739         uint32_t pg_always_on_cu_num = 2;
1740         uint32_t always_on_cu_num;
1741         uint32_t i, j, k;
1742         uint32_t mask, cu_bitmap, counter;
1743
1744         if (adev->flags & AMD_IS_APU)
1745                 always_on_cu_num = 4;
1746         else if (adev->asic_type == CHIP_VEGA12)
1747                 always_on_cu_num = 8;
1748         else
1749                 always_on_cu_num = 12;
1750
1751         mutex_lock(&adev->grbm_idx_mutex);
1752         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1753                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1754                         mask = 1;
1755                         cu_bitmap = 0;
1756                         counter = 0;
1757                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1758
1759                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1760                                 if (cu_info->bitmap[i][j] & mask) {
1761                                         if (counter == pg_always_on_cu_num)
1762                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1763                                         if (counter < always_on_cu_num)
1764                                                 cu_bitmap |= mask;
1765                                         else
1766                                                 break;
1767                                         counter++;
1768                                 }
1769                                 mask <<= 1;
1770                         }
1771
1772                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1773                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1774                 }
1775         }
1776         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1777         mutex_unlock(&adev->grbm_idx_mutex);
1778 }
1779
1780 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1781 {
1782         uint32_t data;
1783
1784         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1785         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1786         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1787         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1788         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1789
1790         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1791         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1792
1793         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1794         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1795
1796         mutex_lock(&adev->grbm_idx_mutex);
1797         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1798         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1799         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1800
1801         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1802         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1803         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1804         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1805         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1806
1807         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1808         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1809         data &= 0x0000FFFF;
1810         data |= 0x00C00000;
1811         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1812
1813         /*
1814          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1815          * programmed in gfx_v9_0_init_always_on_cu_mask()
1816          */
1817
1818         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1819          * but used for RLC_LB_CNTL configuration */
1820         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1821         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1822         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1823         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1824         mutex_unlock(&adev->grbm_idx_mutex);
1825
1826         gfx_v9_0_init_always_on_cu_mask(adev);
1827 }
1828
1829 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1830 {
1831         uint32_t data;
1832
1833         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1834         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1835         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1836         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1837         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1838
1839         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1840         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1841
1842         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1843         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1844
1845         mutex_lock(&adev->grbm_idx_mutex);
1846         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1847         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1848         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1849
1850         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1851         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1852         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1853         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1854         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1855
1856         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1857         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1858         data &= 0x0000FFFF;
1859         data |= 0x00C00000;
1860         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1861
1862         /*
1863          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1864          * programmed in gfx_v9_0_init_always_on_cu_mask()
1865          */
1866
1867         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1868          * but used for RLC_LB_CNTL configuration */
1869         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1870         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1871         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1872         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1873         mutex_unlock(&adev->grbm_idx_mutex);
1874
1875         gfx_v9_0_init_always_on_cu_mask(adev);
1876 }
1877
1878 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1879 {
1880         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1881 }
1882
1883 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1884 {
1885         return 5;
1886 }
1887
1888 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1889 {
1890         const struct cs_section_def *cs_data;
1891         int r;
1892
1893         adev->gfx.rlc.cs_data = gfx9_cs_data;
1894
1895         cs_data = adev->gfx.rlc.cs_data;
1896
1897         if (cs_data) {
1898                 /* init clear state block */
1899                 r = amdgpu_gfx_rlc_init_csb(adev);
1900                 if (r)
1901                         return r;
1902         }
1903
1904         if (adev->flags & AMD_IS_APU) {
1905                 /* TODO: double check the cp_table_size for RV */
1906                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1907                 r = amdgpu_gfx_rlc_init_cpt(adev);
1908                 if (r)
1909                         return r;
1910         }
1911
1912         switch (adev->asic_type) {
1913         case CHIP_RAVEN:
1914                 gfx_v9_0_init_lbpw(adev);
1915                 break;
1916         case CHIP_VEGA20:
1917                 gfx_v9_4_init_lbpw(adev);
1918                 break;
1919         default:
1920                 break;
1921         }
1922
1923         /* init spm vmid with 0xf */
1924         if (adev->gfx.rlc.funcs->update_spm_vmid)
1925                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1926
1927         return 0;
1928 }
1929
1930 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1931 {
1932         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1933         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1934 }
1935
1936 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1937 {
1938         int r;
1939         u32 *hpd;
1940         const __le32 *fw_data;
1941         unsigned fw_size;
1942         u32 *fw;
1943         size_t mec_hpd_size;
1944
1945         const struct gfx_firmware_header_v1_0 *mec_hdr;
1946
1947         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1948
1949         /* take ownership of the relevant compute queues */
1950         amdgpu_gfx_compute_queue_acquire(adev);
1951         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1952         if (mec_hpd_size) {
1953                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1954                                               AMDGPU_GEM_DOMAIN_VRAM,
1955                                               &adev->gfx.mec.hpd_eop_obj,
1956                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1957                                               (void **)&hpd);
1958                 if (r) {
1959                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1960                         gfx_v9_0_mec_fini(adev);
1961                         return r;
1962                 }
1963
1964                 memset(hpd, 0, mec_hpd_size);
1965
1966                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1967                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1968         }
1969
1970         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1971
1972         fw_data = (const __le32 *)
1973                 (adev->gfx.mec_fw->data +
1974                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1975         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1976
1977         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1978                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1979                                       &adev->gfx.mec.mec_fw_obj,
1980                                       &adev->gfx.mec.mec_fw_gpu_addr,
1981                                       (void **)&fw);
1982         if (r) {
1983                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1984                 gfx_v9_0_mec_fini(adev);
1985                 return r;
1986         }
1987
1988         memcpy(fw, fw_data, fw_size);
1989
1990         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1991         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1992
1993         return 0;
1994 }
1995
1996 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1997 {
1998         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1999                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2000                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2001                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
2002                 (SQ_IND_INDEX__FORCE_READ_MASK));
2003         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2004 }
2005
2006 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2007                            uint32_t wave, uint32_t thread,
2008                            uint32_t regno, uint32_t num, uint32_t *out)
2009 {
2010         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2011                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2012                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2013                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2014                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2015                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2016                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2017         while (num--)
2018                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2019 }
2020
2021 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2022 {
2023         /* type 1 wave data */
2024         dst[(*no_fields)++] = 1;
2025         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2026         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2027         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2028         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2029         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2030         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2031         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2032         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2033         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2034         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2035         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2036         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2037         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2038         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2039 }
2040
2041 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2042                                      uint32_t wave, uint32_t start,
2043                                      uint32_t size, uint32_t *dst)
2044 {
2045         wave_read_regs(
2046                 adev, simd, wave, 0,
2047                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2048 }
2049
2050 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2051                                      uint32_t wave, uint32_t thread,
2052                                      uint32_t start, uint32_t size,
2053                                      uint32_t *dst)
2054 {
2055         wave_read_regs(
2056                 adev, simd, wave, thread,
2057                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2058 }
2059
2060 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2061                                   u32 me, u32 pipe, u32 q, u32 vm)
2062 {
2063         soc15_grbm_select(adev, me, pipe, q, vm);
2064 }
2065
2066 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2067         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2068         .select_se_sh = &gfx_v9_0_select_se_sh,
2069         .read_wave_data = &gfx_v9_0_read_wave_data,
2070         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2071         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2072         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2073         .ras_error_inject = &gfx_v9_0_ras_error_inject,
2074         .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2075         .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2076 };
2077
2078 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2079         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2080         .select_se_sh = &gfx_v9_0_select_se_sh,
2081         .read_wave_data = &gfx_v9_0_read_wave_data,
2082         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2083         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2084         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2085         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2086         .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2087         .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2088         .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
2089 };
2090
2091 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2092 {
2093         u32 gb_addr_config;
2094         int err;
2095
2096         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2097
2098         switch (adev->asic_type) {
2099         case CHIP_VEGA10:
2100                 adev->gfx.config.max_hw_contexts = 8;
2101                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2102                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2103                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2104                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2105                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2106                 break;
2107         case CHIP_VEGA12:
2108                 adev->gfx.config.max_hw_contexts = 8;
2109                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2110                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2111                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2112                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2113                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2114                 DRM_INFO("fix gfx.config for vega12\n");
2115                 break;
2116         case CHIP_VEGA20:
2117                 adev->gfx.config.max_hw_contexts = 8;
2118                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2119                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2120                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2121                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2122                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2123                 gb_addr_config &= ~0xf3e777ff;
2124                 gb_addr_config |= 0x22014042;
2125                 /* check vbios table if gpu info is not available */
2126                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2127                 if (err)
2128                         return err;
2129                 break;
2130         case CHIP_RAVEN:
2131                 adev->gfx.config.max_hw_contexts = 8;
2132                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2133                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2134                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2135                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2136                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2137                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2138                 else
2139                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2140                 break;
2141         case CHIP_ARCTURUS:
2142                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2143                 adev->gfx.config.max_hw_contexts = 8;
2144                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2145                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2146                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2147                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2148                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2149                 gb_addr_config &= ~0xf3e777ff;
2150                 gb_addr_config |= 0x22014042;
2151                 break;
2152         case CHIP_RENOIR:
2153                 adev->gfx.config.max_hw_contexts = 8;
2154                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2155                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2156                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2157                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2158                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2159                 gb_addr_config &= ~0xf3e777ff;
2160                 gb_addr_config |= 0x22010042;
2161                 break;
2162         default:
2163                 BUG();
2164                 break;
2165         }
2166
2167         adev->gfx.config.gb_addr_config = gb_addr_config;
2168
2169         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2170                         REG_GET_FIELD(
2171                                         adev->gfx.config.gb_addr_config,
2172                                         GB_ADDR_CONFIG,
2173                                         NUM_PIPES);
2174
2175         adev->gfx.config.max_tile_pipes =
2176                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2177
2178         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2179                         REG_GET_FIELD(
2180                                         adev->gfx.config.gb_addr_config,
2181                                         GB_ADDR_CONFIG,
2182                                         NUM_BANKS);
2183         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2184                         REG_GET_FIELD(
2185                                         adev->gfx.config.gb_addr_config,
2186                                         GB_ADDR_CONFIG,
2187                                         MAX_COMPRESSED_FRAGS);
2188         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2189                         REG_GET_FIELD(
2190                                         adev->gfx.config.gb_addr_config,
2191                                         GB_ADDR_CONFIG,
2192                                         NUM_RB_PER_SE);
2193         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2194                         REG_GET_FIELD(
2195                                         adev->gfx.config.gb_addr_config,
2196                                         GB_ADDR_CONFIG,
2197                                         NUM_SHADER_ENGINES);
2198         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2199                         REG_GET_FIELD(
2200                                         adev->gfx.config.gb_addr_config,
2201                                         GB_ADDR_CONFIG,
2202                                         PIPE_INTERLEAVE_SIZE));
2203
2204         return 0;
2205 }
2206
2207 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2208                                       int mec, int pipe, int queue)
2209 {
2210         unsigned irq_type;
2211         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2212         unsigned int hw_prio;
2213
2214         ring = &adev->gfx.compute_ring[ring_id];
2215
2216         /* mec0 is me1 */
2217         ring->me = mec + 1;
2218         ring->pipe = pipe;
2219         ring->queue = queue;
2220
2221         ring->ring_obj = NULL;
2222         ring->use_doorbell = true;
2223         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2224         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2225                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2226         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2227
2228         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2229                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2230                 + ring->pipe;
2231         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
2232                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2233         /* type-2 packets are deprecated on MEC, use type-3 instead */
2234         return amdgpu_ring_init(adev, ring, 1024,
2235                                 &adev->gfx.eop_irq, irq_type, hw_prio);
2236 }
2237
2238 static int gfx_v9_0_sw_init(void *handle)
2239 {
2240         int i, j, k, r, ring_id;
2241         struct amdgpu_ring *ring;
2242         struct amdgpu_kiq *kiq;
2243         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2244
2245         switch (adev->asic_type) {
2246         case CHIP_VEGA10:
2247         case CHIP_VEGA12:
2248         case CHIP_VEGA20:
2249         case CHIP_RAVEN:
2250         case CHIP_ARCTURUS:
2251         case CHIP_RENOIR:
2252                 adev->gfx.mec.num_mec = 2;
2253                 break;
2254         default:
2255                 adev->gfx.mec.num_mec = 1;
2256                 break;
2257         }
2258
2259         adev->gfx.mec.num_pipe_per_mec = 4;
2260         adev->gfx.mec.num_queue_per_pipe = 8;
2261
2262         /* EOP Event */
2263         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2264         if (r)
2265                 return r;
2266
2267         /* Privileged reg */
2268         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2269                               &adev->gfx.priv_reg_irq);
2270         if (r)
2271                 return r;
2272
2273         /* Privileged inst */
2274         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2275                               &adev->gfx.priv_inst_irq);
2276         if (r)
2277                 return r;
2278
2279         /* ECC error */
2280         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2281                               &adev->gfx.cp_ecc_error_irq);
2282         if (r)
2283                 return r;
2284
2285         /* FUE error */
2286         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2287                               &adev->gfx.cp_ecc_error_irq);
2288         if (r)
2289                 return r;
2290
2291         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2292
2293         gfx_v9_0_scratch_init(adev);
2294
2295         r = gfx_v9_0_init_microcode(adev);
2296         if (r) {
2297                 DRM_ERROR("Failed to load gfx firmware!\n");
2298                 return r;
2299         }
2300
2301         r = adev->gfx.rlc.funcs->init(adev);
2302         if (r) {
2303                 DRM_ERROR("Failed to init rlc BOs!\n");
2304                 return r;
2305         }
2306
2307         r = gfx_v9_0_mec_init(adev);
2308         if (r) {
2309                 DRM_ERROR("Failed to init MEC BOs!\n");
2310                 return r;
2311         }
2312
2313         /* set up the gfx ring */
2314         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2315                 ring = &adev->gfx.gfx_ring[i];
2316                 ring->ring_obj = NULL;
2317                 if (!i)
2318                         sprintf(ring->name, "gfx");
2319                 else
2320                         sprintf(ring->name, "gfx_%d", i);
2321                 ring->use_doorbell = true;
2322                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2323                 r = amdgpu_ring_init(adev, ring, 1024,
2324                                      &adev->gfx.eop_irq,
2325                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2326                                      AMDGPU_RING_PRIO_DEFAULT);
2327                 if (r)
2328                         return r;
2329         }
2330
2331         /* set up the compute queues - allocate horizontally across pipes */
2332         ring_id = 0;
2333         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2334                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2335                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2336                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2337                                         continue;
2338
2339                                 r = gfx_v9_0_compute_ring_init(adev,
2340                                                                ring_id,
2341                                                                i, k, j);
2342                                 if (r)
2343                                         return r;
2344
2345                                 ring_id++;
2346                         }
2347                 }
2348         }
2349
2350         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2351         if (r) {
2352                 DRM_ERROR("Failed to init KIQ BOs!\n");
2353                 return r;
2354         }
2355
2356         kiq = &adev->gfx.kiq;
2357         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2358         if (r)
2359                 return r;
2360
2361         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2362         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2363         if (r)
2364                 return r;
2365
2366         adev->gfx.ce_ram_size = 0x8000;
2367
2368         r = gfx_v9_0_gpu_early_init(adev);
2369         if (r)
2370                 return r;
2371
2372         return 0;
2373 }
2374
2375
2376 static int gfx_v9_0_sw_fini(void *handle)
2377 {
2378         int i;
2379         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2380
2381         amdgpu_gfx_ras_fini(adev);
2382
2383         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2384                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2385         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2386                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2387
2388         amdgpu_gfx_mqd_sw_fini(adev);
2389         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2390         amdgpu_gfx_kiq_fini(adev);
2391
2392         gfx_v9_0_mec_fini(adev);
2393         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2394         if (adev->flags & AMD_IS_APU) {
2395                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2396                                 &adev->gfx.rlc.cp_table_gpu_addr,
2397                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2398         }
2399         gfx_v9_0_free_microcode(adev);
2400
2401         return 0;
2402 }
2403
2404
2405 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2406 {
2407         /* TODO */
2408 }
2409
2410 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2411                            u32 instance)
2412 {
2413         u32 data;
2414
2415         if (instance == 0xffffffff)
2416                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2417         else
2418                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2419
2420         if (se_num == 0xffffffff)
2421                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2422         else
2423                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2424
2425         if (sh_num == 0xffffffff)
2426                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2427         else
2428                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2429
2430         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2431 }
2432
2433 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2434 {
2435         u32 data, mask;
2436
2437         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2438         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2439
2440         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2441         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2442
2443         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2444                                          adev->gfx.config.max_sh_per_se);
2445
2446         return (~data) & mask;
2447 }
2448
2449 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2450 {
2451         int i, j;
2452         u32 data;
2453         u32 active_rbs = 0;
2454         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2455                                         adev->gfx.config.max_sh_per_se;
2456
2457         mutex_lock(&adev->grbm_idx_mutex);
2458         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2459                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2460                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2461                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2462                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2463                                                rb_bitmap_width_per_sh);
2464                 }
2465         }
2466         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2467         mutex_unlock(&adev->grbm_idx_mutex);
2468
2469         adev->gfx.config.backend_enable_mask = active_rbs;
2470         adev->gfx.config.num_rbs = hweight32(active_rbs);
2471 }
2472
2473 #define DEFAULT_SH_MEM_BASES    (0x6000)
2474 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2475 {
2476         int i;
2477         uint32_t sh_mem_config;
2478         uint32_t sh_mem_bases;
2479
2480         /*
2481          * Configure apertures:
2482          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2483          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2484          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2485          */
2486         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2487
2488         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2489                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2490                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2491
2492         mutex_lock(&adev->srbm_mutex);
2493         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2494                 soc15_grbm_select(adev, 0, 0, 0, i);
2495                 /* CP and shaders */
2496                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2497                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2498         }
2499         soc15_grbm_select(adev, 0, 0, 0, 0);
2500         mutex_unlock(&adev->srbm_mutex);
2501
2502         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2503            acccess. These should be enabled by FW for target VMIDs. */
2504         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2505                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2506                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2507                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2508                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2509         }
2510 }
2511
2512 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2513 {
2514         int vmid;
2515
2516         /*
2517          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2518          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2519          * the driver can enable them for graphics. VMID0 should maintain
2520          * access so that HWS firmware can save/restore entries.
2521          */
2522         for (vmid = 1; vmid < 16; vmid++) {
2523                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2524                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2525                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2526                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2527         }
2528 }
2529
2530 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2531 {
2532         uint32_t tmp;
2533
2534         switch (adev->asic_type) {
2535         case CHIP_ARCTURUS:
2536                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2537                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2538                                         DISABLE_BARRIER_WAITCNT, 1);
2539                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2540                 break;
2541         default:
2542                 break;
2543         }
2544 }
2545
2546 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2547 {
2548         u32 tmp;
2549         int i;
2550
2551         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2552
2553         gfx_v9_0_tiling_mode_table_init(adev);
2554
2555         gfx_v9_0_setup_rb(adev);
2556         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2557         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2558
2559         /* XXX SH_MEM regs */
2560         /* where to put LDS, scratch, GPUVM in FSA64 space */
2561         mutex_lock(&adev->srbm_mutex);
2562         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2563                 soc15_grbm_select(adev, 0, 0, 0, i);
2564                 /* CP and shaders */
2565                 if (i == 0) {
2566                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2567                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2568                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2569                                             !!adev->gmc.noretry);
2570                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2571                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2572                 } else {
2573                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2574                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2575                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2576                                             !!adev->gmc.noretry);
2577                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2578                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2579                                 (adev->gmc.private_aperture_start >> 48));
2580                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2581                                 (adev->gmc.shared_aperture_start >> 48));
2582                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2583                 }
2584         }
2585         soc15_grbm_select(adev, 0, 0, 0, 0);
2586
2587         mutex_unlock(&adev->srbm_mutex);
2588
2589         gfx_v9_0_init_compute_vmid(adev);
2590         gfx_v9_0_init_gds_vmid(adev);
2591         gfx_v9_0_init_sq_config(adev);
2592 }
2593
2594 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2595 {
2596         u32 i, j, k;
2597         u32 mask;
2598
2599         mutex_lock(&adev->grbm_idx_mutex);
2600         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2601                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2602                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2603                         for (k = 0; k < adev->usec_timeout; k++) {
2604                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2605                                         break;
2606                                 udelay(1);
2607                         }
2608                         if (k == adev->usec_timeout) {
2609                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2610                                                       0xffffffff, 0xffffffff);
2611                                 mutex_unlock(&adev->grbm_idx_mutex);
2612                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2613                                          i, j);
2614                                 return;
2615                         }
2616                 }
2617         }
2618         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2619         mutex_unlock(&adev->grbm_idx_mutex);
2620
2621         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2622                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2623                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2624                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2625         for (k = 0; k < adev->usec_timeout; k++) {
2626                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2627                         break;
2628                 udelay(1);
2629         }
2630 }
2631
2632 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2633                                                bool enable)
2634 {
2635         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2636
2637         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2638         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2639         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2640         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2641
2642         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2643 }
2644
2645 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2646 {
2647         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2648         /* csib */
2649         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2650                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2651         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2652                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2653         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2654                         adev->gfx.rlc.clear_state_size);
2655 }
2656
2657 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2658                                 int indirect_offset,
2659                                 int list_size,
2660                                 int *unique_indirect_regs,
2661                                 int unique_indirect_reg_count,
2662                                 int *indirect_start_offsets,
2663                                 int *indirect_start_offsets_count,
2664                                 int max_start_offsets_count)
2665 {
2666         int idx;
2667
2668         for (; indirect_offset < list_size; indirect_offset++) {
2669                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2670                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2671                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2672
2673                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2674                         indirect_offset += 2;
2675
2676                         /* look for the matching indice */
2677                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2678                                 if (unique_indirect_regs[idx] ==
2679                                         register_list_format[indirect_offset] ||
2680                                         !unique_indirect_regs[idx])
2681                                         break;
2682                         }
2683
2684                         BUG_ON(idx >= unique_indirect_reg_count);
2685
2686                         if (!unique_indirect_regs[idx])
2687                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2688
2689                         indirect_offset++;
2690                 }
2691         }
2692 }
2693
2694 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2695 {
2696         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2697         int unique_indirect_reg_count = 0;
2698
2699         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2700         int indirect_start_offsets_count = 0;
2701
2702         int list_size = 0;
2703         int i = 0, j = 0;
2704         u32 tmp = 0;
2705
2706         u32 *register_list_format =
2707                 kmemdup(adev->gfx.rlc.register_list_format,
2708                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2709         if (!register_list_format)
2710                 return -ENOMEM;
2711
2712         /* setup unique_indirect_regs array and indirect_start_offsets array */
2713         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2714         gfx_v9_1_parse_ind_reg_list(register_list_format,
2715                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2716                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2717                                     unique_indirect_regs,
2718                                     unique_indirect_reg_count,
2719                                     indirect_start_offsets,
2720                                     &indirect_start_offsets_count,
2721                                     ARRAY_SIZE(indirect_start_offsets));
2722
2723         /* enable auto inc in case it is disabled */
2724         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2725         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2726         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2727
2728         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2729         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2730                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2731         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2732                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2733                         adev->gfx.rlc.register_restore[i]);
2734
2735         /* load indirect register */
2736         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2737                 adev->gfx.rlc.reg_list_format_start);
2738
2739         /* direct register portion */
2740         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2741                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2742                         register_list_format[i]);
2743
2744         /* indirect register portion */
2745         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2746                 if (register_list_format[i] == 0xFFFFFFFF) {
2747                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2748                         continue;
2749                 }
2750
2751                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2752                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2753
2754                 for (j = 0; j < unique_indirect_reg_count; j++) {
2755                         if (register_list_format[i] == unique_indirect_regs[j]) {
2756                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2757                                 break;
2758                         }
2759                 }
2760
2761                 BUG_ON(j >= unique_indirect_reg_count);
2762
2763                 i++;
2764         }
2765
2766         /* set save/restore list size */
2767         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2768         list_size = list_size >> 1;
2769         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2770                 adev->gfx.rlc.reg_restore_list_size);
2771         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2772
2773         /* write the starting offsets to RLC scratch ram */
2774         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2775                 adev->gfx.rlc.starting_offsets_start);
2776         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2777                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2778                        indirect_start_offsets[i]);
2779
2780         /* load unique indirect regs*/
2781         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2782                 if (unique_indirect_regs[i] != 0) {
2783                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2784                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2785                                unique_indirect_regs[i] & 0x3FFFF);
2786
2787                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2788                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2789                                unique_indirect_regs[i] >> 20);
2790                 }
2791         }
2792
2793         kfree(register_list_format);
2794         return 0;
2795 }
2796
2797 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2798 {
2799         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2800 }
2801
2802 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2803                                              bool enable)
2804 {
2805         uint32_t data = 0;
2806         uint32_t default_data = 0;
2807
2808         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2809         if (enable) {
2810                 /* enable GFXIP control over CGPG */
2811                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2812                 if(default_data != data)
2813                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2814
2815                 /* update status */
2816                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2817                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2818                 if(default_data != data)
2819                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2820         } else {
2821                 /* restore GFXIP control over GCPG */
2822                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2823                 if(default_data != data)
2824                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2825         }
2826 }
2827
2828 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2829 {
2830         uint32_t data = 0;
2831
2832         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2833                               AMD_PG_SUPPORT_GFX_SMG |
2834                               AMD_PG_SUPPORT_GFX_DMG)) {
2835                 /* init IDLE_POLL_COUNT = 60 */
2836                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2837                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2838                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2839                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2840
2841                 /* init RLC PG Delay */
2842                 data = 0;
2843                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2844                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2845                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2846                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2847                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2848
2849                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2850                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2851                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2852                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2853
2854                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2855                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2856                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2857                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2858
2859                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2860                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2861
2862                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2863                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2864                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2865                 if (adev->asic_type != CHIP_RENOIR)
2866                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2867         }
2868 }
2869
2870 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2871                                                 bool enable)
2872 {
2873         uint32_t data = 0;
2874         uint32_t default_data = 0;
2875
2876         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2877         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2878                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2879                              enable ? 1 : 0);
2880         if (default_data != data)
2881                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2882 }
2883
2884 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2885                                                 bool enable)
2886 {
2887         uint32_t data = 0;
2888         uint32_t default_data = 0;
2889
2890         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2891         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2892                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2893                              enable ? 1 : 0);
2894         if(default_data != data)
2895                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2896 }
2897
2898 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2899                                         bool enable)
2900 {
2901         uint32_t data = 0;
2902         uint32_t default_data = 0;
2903
2904         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2905         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2906                              CP_PG_DISABLE,
2907                              enable ? 0 : 1);
2908         if(default_data != data)
2909                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2910 }
2911
2912 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2913                                                 bool enable)
2914 {
2915         uint32_t data, default_data;
2916
2917         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2918         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2919                              GFX_POWER_GATING_ENABLE,
2920                              enable ? 1 : 0);
2921         if(default_data != data)
2922                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2923 }
2924
2925 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2926                                                 bool enable)
2927 {
2928         uint32_t data, default_data;
2929
2930         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2931         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2932                              GFX_PIPELINE_PG_ENABLE,
2933                              enable ? 1 : 0);
2934         if(default_data != data)
2935                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2936
2937         if (!enable)
2938                 /* read any GFX register to wake up GFX */
2939                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2940 }
2941
2942 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2943                                                        bool enable)
2944 {
2945         uint32_t data, default_data;
2946
2947         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2948         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2949                              STATIC_PER_CU_PG_ENABLE,
2950                              enable ? 1 : 0);
2951         if(default_data != data)
2952                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2953 }
2954
2955 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2956                                                 bool enable)
2957 {
2958         uint32_t data, default_data;
2959
2960         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2961         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2962                              DYN_PER_CU_PG_ENABLE,
2963                              enable ? 1 : 0);
2964         if(default_data != data)
2965                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2966 }
2967
2968 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2969 {
2970         gfx_v9_0_init_csb(adev);
2971
2972         /*
2973          * Rlc save restore list is workable since v2_1.
2974          * And it's needed by gfxoff feature.
2975          */
2976         if (adev->gfx.rlc.is_rlc_v2_1) {
2977                 if (adev->asic_type == CHIP_VEGA12 ||
2978                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
2979                         gfx_v9_1_init_rlc_save_restore_list(adev);
2980                 gfx_v9_0_enable_save_restore_machine(adev);
2981         }
2982
2983         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2984                               AMD_PG_SUPPORT_GFX_SMG |
2985                               AMD_PG_SUPPORT_GFX_DMG |
2986                               AMD_PG_SUPPORT_CP |
2987                               AMD_PG_SUPPORT_GDS |
2988                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2989                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2990                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2991                 gfx_v9_0_init_gfx_power_gating(adev);
2992         }
2993 }
2994
2995 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2996 {
2997         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2998         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2999         gfx_v9_0_wait_for_rlc_serdes(adev);
3000 }
3001
3002 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3003 {
3004         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3005         udelay(50);
3006         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3007         udelay(50);
3008 }
3009
3010 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3011 {
3012 #ifdef AMDGPU_RLC_DEBUG_RETRY
3013         u32 rlc_ucode_ver;
3014 #endif
3015
3016         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3017         udelay(50);
3018
3019         /* carrizo do enable cp interrupt after cp inited */
3020         if (!(adev->flags & AMD_IS_APU)) {
3021                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3022                 udelay(50);
3023         }
3024
3025 #ifdef AMDGPU_RLC_DEBUG_RETRY
3026         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3027         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3028         if(rlc_ucode_ver == 0x108) {
3029                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3030                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3031                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3032                  * default is 0x9C4 to create a 100us interval */
3033                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3034                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3035                  * to disable the page fault retry interrupts, default is
3036                  * 0x100 (256) */
3037                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3038         }
3039 #endif
3040 }
3041
3042 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3043 {
3044         const struct rlc_firmware_header_v2_0 *hdr;
3045         const __le32 *fw_data;
3046         unsigned i, fw_size;
3047
3048         if (!adev->gfx.rlc_fw)
3049                 return -EINVAL;
3050
3051         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3052         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3053
3054         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3055                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3056         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3057
3058         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3059                         RLCG_UCODE_LOADING_START_ADDRESS);
3060         for (i = 0; i < fw_size; i++)
3061                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3062         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3063
3064         return 0;
3065 }
3066
3067 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3068 {
3069         int r;
3070
3071         if (amdgpu_sriov_vf(adev)) {
3072                 gfx_v9_0_init_csb(adev);
3073                 return 0;
3074         }
3075
3076         adev->gfx.rlc.funcs->stop(adev);
3077
3078         /* disable CG */
3079         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3080
3081         gfx_v9_0_init_pg(adev);
3082
3083         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3084                 /* legacy rlc firmware loading */
3085                 r = gfx_v9_0_rlc_load_microcode(adev);
3086                 if (r)
3087                         return r;
3088         }
3089
3090         switch (adev->asic_type) {
3091         case CHIP_RAVEN:
3092                 if (amdgpu_lbpw == 0)
3093                         gfx_v9_0_enable_lbpw(adev, false);
3094                 else
3095                         gfx_v9_0_enable_lbpw(adev, true);
3096                 break;
3097         case CHIP_VEGA20:
3098                 if (amdgpu_lbpw > 0)
3099                         gfx_v9_0_enable_lbpw(adev, true);
3100                 else
3101                         gfx_v9_0_enable_lbpw(adev, false);
3102                 break;
3103         default:
3104                 break;
3105         }
3106
3107         adev->gfx.rlc.funcs->start(adev);
3108
3109         return 0;
3110 }
3111
3112 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3113 {
3114         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3115
3116         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3117         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3118         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3119         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3120         udelay(50);
3121 }
3122
3123 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3124 {
3125         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3126         const struct gfx_firmware_header_v1_0 *ce_hdr;
3127         const struct gfx_firmware_header_v1_0 *me_hdr;
3128         const __le32 *fw_data;
3129         unsigned i, fw_size;
3130
3131         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3132                 return -EINVAL;
3133
3134         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3135                 adev->gfx.pfp_fw->data;
3136         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3137                 adev->gfx.ce_fw->data;
3138         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3139                 adev->gfx.me_fw->data;
3140
3141         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3142         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3143         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3144
3145         gfx_v9_0_cp_gfx_enable(adev, false);
3146
3147         /* PFP */
3148         fw_data = (const __le32 *)
3149                 (adev->gfx.pfp_fw->data +
3150                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3151         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3152         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3153         for (i = 0; i < fw_size; i++)
3154                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3155         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3156
3157         /* CE */
3158         fw_data = (const __le32 *)
3159                 (adev->gfx.ce_fw->data +
3160                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3161         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3162         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3163         for (i = 0; i < fw_size; i++)
3164                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3165         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3166
3167         /* ME */
3168         fw_data = (const __le32 *)
3169                 (adev->gfx.me_fw->data +
3170                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3171         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3172         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3173         for (i = 0; i < fw_size; i++)
3174                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3175         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3176
3177         return 0;
3178 }
3179
3180 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3181 {
3182         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3183         const struct cs_section_def *sect = NULL;
3184         const struct cs_extent_def *ext = NULL;
3185         int r, i, tmp;
3186
3187         /* init the CP */
3188         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3189         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3190
3191         gfx_v9_0_cp_gfx_enable(adev, true);
3192
3193         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3194         if (r) {
3195                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3196                 return r;
3197         }
3198
3199         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3200         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3201
3202         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3203         amdgpu_ring_write(ring, 0x80000000);
3204         amdgpu_ring_write(ring, 0x80000000);
3205
3206         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3207                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3208                         if (sect->id == SECT_CONTEXT) {
3209                                 amdgpu_ring_write(ring,
3210                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3211                                                ext->reg_count));
3212                                 amdgpu_ring_write(ring,
3213                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3214                                 for (i = 0; i < ext->reg_count; i++)
3215                                         amdgpu_ring_write(ring, ext->extent[i]);
3216                         }
3217                 }
3218         }
3219
3220         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3221         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3222
3223         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3224         amdgpu_ring_write(ring, 0);
3225
3226         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3227         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3228         amdgpu_ring_write(ring, 0x8000);
3229         amdgpu_ring_write(ring, 0x8000);
3230
3231         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3232         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3233                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3234         amdgpu_ring_write(ring, tmp);
3235         amdgpu_ring_write(ring, 0);
3236
3237         amdgpu_ring_commit(ring);
3238
3239         return 0;
3240 }
3241
3242 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3243 {
3244         struct amdgpu_ring *ring;
3245         u32 tmp;
3246         u32 rb_bufsz;
3247         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3248
3249         /* Set the write pointer delay */
3250         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3251
3252         /* set the RB to use vmid 0 */
3253         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3254
3255         /* Set ring buffer size */
3256         ring = &adev->gfx.gfx_ring[0];
3257         rb_bufsz = order_base_2(ring->ring_size / 8);
3258         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3259         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3260 #ifdef __BIG_ENDIAN
3261         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3262 #endif
3263         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3264
3265         /* Initialize the ring buffer's write pointers */
3266         ring->wptr = 0;
3267         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3268         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3269
3270         /* set the wb address wether it's enabled or not */
3271         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3272         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3273         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3274
3275         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3276         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3277         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3278
3279         mdelay(1);
3280         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3281
3282         rb_addr = ring->gpu_addr >> 8;
3283         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3284         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3285
3286         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3287         if (ring->use_doorbell) {
3288                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3289                                     DOORBELL_OFFSET, ring->doorbell_index);
3290                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3291                                     DOORBELL_EN, 1);
3292         } else {
3293                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3294         }
3295         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3296
3297         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3298                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3299         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3300
3301         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3302                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3303
3304
3305         /* start the ring */
3306         gfx_v9_0_cp_gfx_start(adev);
3307         ring->sched.ready = true;
3308
3309         return 0;
3310 }
3311
3312 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3313 {
3314         if (enable) {
3315                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3316         } else {
3317                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3318                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3319                 adev->gfx.kiq.ring.sched.ready = false;
3320         }
3321         udelay(50);
3322 }
3323
3324 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3325 {
3326         const struct gfx_firmware_header_v1_0 *mec_hdr;
3327         const __le32 *fw_data;
3328         unsigned i;
3329         u32 tmp;
3330
3331         if (!adev->gfx.mec_fw)
3332                 return -EINVAL;
3333
3334         gfx_v9_0_cp_compute_enable(adev, false);
3335
3336         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3337         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3338
3339         fw_data = (const __le32 *)
3340                 (adev->gfx.mec_fw->data +
3341                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3342         tmp = 0;
3343         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3344         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3345         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3346
3347         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3348                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3349         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3350                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3351
3352         /* MEC1 */
3353         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3354                          mec_hdr->jt_offset);
3355         for (i = 0; i < mec_hdr->jt_size; i++)
3356                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3357                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3358
3359         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3360                         adev->gfx.mec_fw_version);
3361         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3362
3363         return 0;
3364 }
3365
3366 /* KIQ functions */
3367 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3368 {
3369         uint32_t tmp;
3370         struct amdgpu_device *adev = ring->adev;
3371
3372         /* tell RLC which is KIQ queue */
3373         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3374         tmp &= 0xffffff00;
3375         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3376         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3377         tmp |= 0x80;
3378         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3379 }
3380
3381 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3382 {
3383         struct amdgpu_device *adev = ring->adev;
3384
3385         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3386                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
3387                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3388                         mqd->cp_hqd_queue_priority =
3389                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3390                 }
3391         }
3392 }
3393
3394 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3395 {
3396         struct amdgpu_device *adev = ring->adev;
3397         struct v9_mqd *mqd = ring->mqd_ptr;
3398         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3399         uint32_t tmp;
3400
3401         mqd->header = 0xC0310800;
3402         mqd->compute_pipelinestat_enable = 0x00000001;
3403         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3404         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3405         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3406         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3407         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3408         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3409         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3410         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3411         mqd->compute_misc_reserved = 0x00000003;
3412
3413         mqd->dynamic_cu_mask_addr_lo =
3414                 lower_32_bits(ring->mqd_gpu_addr
3415                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3416         mqd->dynamic_cu_mask_addr_hi =
3417                 upper_32_bits(ring->mqd_gpu_addr
3418                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3419
3420         eop_base_addr = ring->eop_gpu_addr >> 8;
3421         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3422         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3423
3424         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3425         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3426         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3427                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3428
3429         mqd->cp_hqd_eop_control = tmp;
3430
3431         /* enable doorbell? */
3432         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3433
3434         if (ring->use_doorbell) {
3435                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3436                                     DOORBELL_OFFSET, ring->doorbell_index);
3437                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3438                                     DOORBELL_EN, 1);
3439                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3440                                     DOORBELL_SOURCE, 0);
3441                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3442                                     DOORBELL_HIT, 0);
3443         } else {
3444                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3445                                          DOORBELL_EN, 0);
3446         }
3447
3448         mqd->cp_hqd_pq_doorbell_control = tmp;
3449
3450         /* disable the queue if it's active */
3451         ring->wptr = 0;
3452         mqd->cp_hqd_dequeue_request = 0;
3453         mqd->cp_hqd_pq_rptr = 0;
3454         mqd->cp_hqd_pq_wptr_lo = 0;
3455         mqd->cp_hqd_pq_wptr_hi = 0;
3456
3457         /* set the pointer to the MQD */
3458         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3459         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3460
3461         /* set MQD vmid to 0 */
3462         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3463         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3464         mqd->cp_mqd_control = tmp;
3465
3466         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3467         hqd_gpu_addr = ring->gpu_addr >> 8;
3468         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3469         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3470
3471         /* set up the HQD, this is similar to CP_RB0_CNTL */
3472         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3473         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3474                             (order_base_2(ring->ring_size / 4) - 1));
3475         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3476                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3477 #ifdef __BIG_ENDIAN
3478         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3479 #endif
3480         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3481         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3482         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3483         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3484         mqd->cp_hqd_pq_control = tmp;
3485
3486         /* set the wb address whether it's enabled or not */
3487         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3488         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3489         mqd->cp_hqd_pq_rptr_report_addr_hi =
3490                 upper_32_bits(wb_gpu_addr) & 0xffff;
3491
3492         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3493         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3494         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3495         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3496
3497         tmp = 0;
3498         /* enable the doorbell if requested */
3499         if (ring->use_doorbell) {
3500                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3501                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3502                                 DOORBELL_OFFSET, ring->doorbell_index);
3503
3504                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3505                                          DOORBELL_EN, 1);
3506                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3507                                          DOORBELL_SOURCE, 0);
3508                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3509                                          DOORBELL_HIT, 0);
3510         }
3511
3512         mqd->cp_hqd_pq_doorbell_control = tmp;
3513
3514         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3515         ring->wptr = 0;
3516         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3517
3518         /* set the vmid for the queue */
3519         mqd->cp_hqd_vmid = 0;
3520
3521         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3522         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3523         mqd->cp_hqd_persistent_state = tmp;
3524
3525         /* set MIN_IB_AVAIL_SIZE */
3526         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3527         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3528         mqd->cp_hqd_ib_control = tmp;
3529
3530         /* set static priority for a queue/ring */
3531         gfx_v9_0_mqd_set_priority(ring, mqd);
3532         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3533
3534         /* map_queues packet doesn't need activate the queue,
3535          * so only kiq need set this field.
3536          */
3537         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3538                 mqd->cp_hqd_active = 1;
3539
3540         return 0;
3541 }
3542
3543 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3544 {
3545         struct amdgpu_device *adev = ring->adev;
3546         struct v9_mqd *mqd = ring->mqd_ptr;
3547         int j;
3548
3549         /* disable wptr polling */
3550         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3551
3552         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3553                mqd->cp_hqd_eop_base_addr_lo);
3554         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3555                mqd->cp_hqd_eop_base_addr_hi);
3556
3557         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3558         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3559                mqd->cp_hqd_eop_control);
3560
3561         /* enable doorbell? */
3562         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3563                mqd->cp_hqd_pq_doorbell_control);
3564
3565         /* disable the queue if it's active */
3566         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3567                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3568                 for (j = 0; j < adev->usec_timeout; j++) {
3569                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3570                                 break;
3571                         udelay(1);
3572                 }
3573                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3574                        mqd->cp_hqd_dequeue_request);
3575                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3576                        mqd->cp_hqd_pq_rptr);
3577                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3578                        mqd->cp_hqd_pq_wptr_lo);
3579                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3580                        mqd->cp_hqd_pq_wptr_hi);
3581         }
3582
3583         /* set the pointer to the MQD */
3584         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3585                mqd->cp_mqd_base_addr_lo);
3586         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3587                mqd->cp_mqd_base_addr_hi);
3588
3589         /* set MQD vmid to 0 */
3590         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3591                mqd->cp_mqd_control);
3592
3593         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3595                mqd->cp_hqd_pq_base_lo);
3596         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3597                mqd->cp_hqd_pq_base_hi);
3598
3599         /* set up the HQD, this is similar to CP_RB0_CNTL */
3600         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3601                mqd->cp_hqd_pq_control);
3602
3603         /* set the wb address whether it's enabled or not */
3604         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3605                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3606         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3607                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3608
3609         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3610         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3611                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3612         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3613                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3614
3615         /* enable the doorbell if requested */
3616         if (ring->use_doorbell) {
3617                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3618                                         (adev->doorbell_index.kiq * 2) << 2);
3619                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3620                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3621         }
3622
3623         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3624                mqd->cp_hqd_pq_doorbell_control);
3625
3626         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3627         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3628                mqd->cp_hqd_pq_wptr_lo);
3629         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3630                mqd->cp_hqd_pq_wptr_hi);
3631
3632         /* set the vmid for the queue */
3633         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3634
3635         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3636                mqd->cp_hqd_persistent_state);
3637
3638         /* activate the queue */
3639         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3640                mqd->cp_hqd_active);
3641
3642         if (ring->use_doorbell)
3643                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3644
3645         return 0;
3646 }
3647
3648 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3649 {
3650         struct amdgpu_device *adev = ring->adev;
3651         int j;
3652
3653         /* disable the queue if it's active */
3654         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3655
3656                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3657
3658                 for (j = 0; j < adev->usec_timeout; j++) {
3659                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3660                                 break;
3661                         udelay(1);
3662                 }
3663
3664                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3665                         DRM_DEBUG("KIQ dequeue request failed.\n");
3666
3667                         /* Manual disable if dequeue request times out */
3668                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3669                 }
3670
3671                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3672                       0);
3673         }
3674
3675         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3676         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3677         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3678         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3679         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3680         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3681         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3682         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3683
3684         return 0;
3685 }
3686
3687 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3688 {
3689         struct amdgpu_device *adev = ring->adev;
3690         struct v9_mqd *mqd = ring->mqd_ptr;
3691         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3692
3693         gfx_v9_0_kiq_setting(ring);
3694
3695         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3696                 /* reset MQD to a clean status */
3697                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3698                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3699
3700                 /* reset ring buffer */
3701                 ring->wptr = 0;
3702                 amdgpu_ring_clear_ring(ring);
3703
3704                 mutex_lock(&adev->srbm_mutex);
3705                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3706                 gfx_v9_0_kiq_init_register(ring);
3707                 soc15_grbm_select(adev, 0, 0, 0, 0);
3708                 mutex_unlock(&adev->srbm_mutex);
3709         } else {
3710                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3711                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3712                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3713                 mutex_lock(&adev->srbm_mutex);
3714                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3715                 gfx_v9_0_mqd_init(ring);
3716                 gfx_v9_0_kiq_init_register(ring);
3717                 soc15_grbm_select(adev, 0, 0, 0, 0);
3718                 mutex_unlock(&adev->srbm_mutex);
3719
3720                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3721                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3722         }
3723
3724         return 0;
3725 }
3726
3727 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3728 {
3729         struct amdgpu_device *adev = ring->adev;
3730         struct v9_mqd *mqd = ring->mqd_ptr;
3731         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3732
3733         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3734                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3735                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3736                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3737                 mutex_lock(&adev->srbm_mutex);
3738                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3739                 gfx_v9_0_mqd_init(ring);
3740                 soc15_grbm_select(adev, 0, 0, 0, 0);
3741                 mutex_unlock(&adev->srbm_mutex);
3742
3743                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3744                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3745         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3746                 /* reset MQD to a clean status */
3747                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3748                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3749
3750                 /* reset ring buffer */
3751                 ring->wptr = 0;
3752                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3753                 amdgpu_ring_clear_ring(ring);
3754         } else {
3755                 amdgpu_ring_clear_ring(ring);
3756         }
3757
3758         return 0;
3759 }
3760
3761 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3762 {
3763         struct amdgpu_ring *ring;
3764         int r;
3765
3766         ring = &adev->gfx.kiq.ring;
3767
3768         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3769         if (unlikely(r != 0))
3770                 return r;
3771
3772         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3773         if (unlikely(r != 0))
3774                 return r;
3775
3776         gfx_v9_0_kiq_init_queue(ring);
3777         amdgpu_bo_kunmap(ring->mqd_obj);
3778         ring->mqd_ptr = NULL;
3779         amdgpu_bo_unreserve(ring->mqd_obj);
3780         ring->sched.ready = true;
3781         return 0;
3782 }
3783
3784 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3785 {
3786         struct amdgpu_ring *ring = NULL;
3787         int r = 0, i;
3788
3789         gfx_v9_0_cp_compute_enable(adev, true);
3790
3791         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3792                 ring = &adev->gfx.compute_ring[i];
3793
3794                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3795                 if (unlikely(r != 0))
3796                         goto done;
3797                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3798                 if (!r) {
3799                         r = gfx_v9_0_kcq_init_queue(ring);
3800                         amdgpu_bo_kunmap(ring->mqd_obj);
3801                         ring->mqd_ptr = NULL;
3802                 }
3803                 amdgpu_bo_unreserve(ring->mqd_obj);
3804                 if (r)
3805                         goto done;
3806         }
3807
3808         r = amdgpu_gfx_enable_kcq(adev);
3809 done:
3810         return r;
3811 }
3812
3813 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3814 {
3815         int r, i;
3816         struct amdgpu_ring *ring;
3817
3818         if (!(adev->flags & AMD_IS_APU))
3819                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3820
3821         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3822                 if (adev->asic_type != CHIP_ARCTURUS) {
3823                         /* legacy firmware loading */
3824                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3825                         if (r)
3826                                 return r;
3827                 }
3828
3829                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3830                 if (r)
3831                         return r;
3832         }
3833
3834         r = gfx_v9_0_kiq_resume(adev);
3835         if (r)
3836                 return r;
3837
3838         if (adev->asic_type != CHIP_ARCTURUS) {
3839                 r = gfx_v9_0_cp_gfx_resume(adev);
3840                 if (r)
3841                         return r;
3842         }
3843
3844         r = gfx_v9_0_kcq_resume(adev);
3845         if (r)
3846                 return r;
3847
3848         if (adev->asic_type != CHIP_ARCTURUS) {
3849                 ring = &adev->gfx.gfx_ring[0];
3850                 r = amdgpu_ring_test_helper(ring);
3851                 if (r)
3852                         return r;
3853         }
3854
3855         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3856                 ring = &adev->gfx.compute_ring[i];
3857                 amdgpu_ring_test_helper(ring);
3858         }
3859
3860         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3861
3862         return 0;
3863 }
3864
3865 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3866 {
3867         u32 tmp;
3868
3869         if (adev->asic_type != CHIP_ARCTURUS)
3870                 return;
3871
3872         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3873         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3874                                 adev->df.hash_status.hash_64k);
3875         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3876                                 adev->df.hash_status.hash_2m);
3877         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3878                                 adev->df.hash_status.hash_1g);
3879         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3880 }
3881
3882 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3883 {
3884         if (adev->asic_type != CHIP_ARCTURUS)
3885                 gfx_v9_0_cp_gfx_enable(adev, enable);
3886         gfx_v9_0_cp_compute_enable(adev, enable);
3887 }
3888
3889 static int gfx_v9_0_hw_init(void *handle)
3890 {
3891         int r;
3892         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3893
3894         if (!amdgpu_sriov_vf(adev))
3895                 gfx_v9_0_init_golden_registers(adev);
3896
3897         gfx_v9_0_constants_init(adev);
3898
3899         gfx_v9_0_init_tcp_config(adev);
3900
3901         r = adev->gfx.rlc.funcs->resume(adev);
3902         if (r)
3903                 return r;
3904
3905         r = gfx_v9_0_cp_resume(adev);
3906         if (r)
3907                 return r;
3908
3909         return r;
3910 }
3911
3912 static int gfx_v9_0_hw_fini(void *handle)
3913 {
3914         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3915
3916         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3917         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3918         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3919
3920         /* DF freeze and kcq disable will fail */
3921         if (!amdgpu_ras_intr_triggered())
3922                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3923                 amdgpu_gfx_disable_kcq(adev);
3924
3925         if (amdgpu_sriov_vf(adev)) {
3926                 gfx_v9_0_cp_gfx_enable(adev, false);
3927                 /* must disable polling for SRIOV when hw finished, otherwise
3928                  * CPC engine may still keep fetching WB address which is already
3929                  * invalid after sw finished and trigger DMAR reading error in
3930                  * hypervisor side.
3931                  */
3932                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3933                 return 0;
3934         }
3935
3936         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3937          * otherwise KIQ is hanging when binding back
3938          */
3939         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3940                 mutex_lock(&adev->srbm_mutex);
3941                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3942                                 adev->gfx.kiq.ring.pipe,
3943                                 adev->gfx.kiq.ring.queue, 0);
3944                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3945                 soc15_grbm_select(adev, 0, 0, 0, 0);
3946                 mutex_unlock(&adev->srbm_mutex);
3947         }
3948
3949         gfx_v9_0_cp_enable(adev, false);
3950         adev->gfx.rlc.funcs->stop(adev);
3951
3952         return 0;
3953 }
3954
3955 static int gfx_v9_0_suspend(void *handle)
3956 {
3957         return gfx_v9_0_hw_fini(handle);
3958 }
3959
3960 static int gfx_v9_0_resume(void *handle)
3961 {
3962         return gfx_v9_0_hw_init(handle);
3963 }
3964
3965 static bool gfx_v9_0_is_idle(void *handle)
3966 {
3967         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3968
3969         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3970                                 GRBM_STATUS, GUI_ACTIVE))
3971                 return false;
3972         else
3973                 return true;
3974 }
3975
3976 static int gfx_v9_0_wait_for_idle(void *handle)
3977 {
3978         unsigned i;
3979         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3980
3981         for (i = 0; i < adev->usec_timeout; i++) {
3982                 if (gfx_v9_0_is_idle(handle))
3983                         return 0;
3984                 udelay(1);
3985         }
3986         return -ETIMEDOUT;
3987 }
3988
3989 static int gfx_v9_0_soft_reset(void *handle)
3990 {
3991         u32 grbm_soft_reset = 0;
3992         u32 tmp;
3993         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3994
3995         /* GRBM_STATUS */
3996         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3997         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3998                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3999                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4000                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4001                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4002                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4003                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4004                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4005                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4006                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4007         }
4008
4009         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4010                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4011                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4012         }
4013
4014         /* GRBM_STATUS2 */
4015         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4016         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4017                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4018                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4019
4020
4021         if (grbm_soft_reset) {
4022                 /* stop the rlc */
4023                 adev->gfx.rlc.funcs->stop(adev);
4024
4025                 if (adev->asic_type != CHIP_ARCTURUS)
4026                         /* Disable GFX parsing/prefetching */
4027                         gfx_v9_0_cp_gfx_enable(adev, false);
4028
4029                 /* Disable MEC parsing/prefetching */
4030                 gfx_v9_0_cp_compute_enable(adev, false);
4031
4032                 if (grbm_soft_reset) {
4033                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4034                         tmp |= grbm_soft_reset;
4035                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4036                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4037                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4038
4039                         udelay(50);
4040
4041                         tmp &= ~grbm_soft_reset;
4042                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4043                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4044                 }
4045
4046                 /* Wait a little for things to settle down */
4047                 udelay(50);
4048         }
4049         return 0;
4050 }
4051
4052 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4053 {
4054         signed long r, cnt = 0;
4055         unsigned long flags;
4056         uint32_t seq, reg_val_offs = 0;
4057         uint64_t value = 0;
4058         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4059         struct amdgpu_ring *ring = &kiq->ring;
4060
4061         BUG_ON(!ring->funcs->emit_rreg);
4062
4063         spin_lock_irqsave(&kiq->ring_lock, flags);
4064         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4065                 pr_err("critical bug! too many kiq readers\n");
4066                 goto failed_unlock;
4067         }
4068         amdgpu_ring_alloc(ring, 32);
4069         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4070         amdgpu_ring_write(ring, 9 |     /* src: register*/
4071                                 (5 << 8) |      /* dst: memory */
4072                                 (1 << 16) |     /* count sel */
4073                                 (1 << 20));     /* write confirm */
4074         amdgpu_ring_write(ring, 0);
4075         amdgpu_ring_write(ring, 0);
4076         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4077                                 reg_val_offs * 4));
4078         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4079                                 reg_val_offs * 4));
4080         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4081         if (r)
4082                 goto failed_undo;
4083
4084         amdgpu_ring_commit(ring);
4085         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4086
4087         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4088
4089         /* don't wait anymore for gpu reset case because this way may
4090          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4091          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4092          * never return if we keep waiting in virt_kiq_rreg, which cause
4093          * gpu_recover() hang there.
4094          *
4095          * also don't wait anymore for IRQ context
4096          * */
4097         if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
4098                 goto failed_kiq_read;
4099
4100         might_sleep();
4101         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4102                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4103                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4104         }
4105
4106         if (cnt > MAX_KIQ_REG_TRY)
4107                 goto failed_kiq_read;
4108
4109         mb();
4110         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4111                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4112         amdgpu_device_wb_free(adev, reg_val_offs);
4113         return value;
4114
4115 failed_undo:
4116         amdgpu_ring_undo(ring);
4117 failed_unlock:
4118         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4119 failed_kiq_read:
4120         if (reg_val_offs)
4121                 amdgpu_device_wb_free(adev, reg_val_offs);
4122         pr_err("failed to read gpu clock\n");
4123         return ~0;
4124 }
4125
4126 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4127 {
4128         uint64_t clock;
4129
4130         amdgpu_gfx_off_ctrl(adev, false);
4131         mutex_lock(&adev->gfx.gpu_clock_mutex);
4132         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4133                 clock = gfx_v9_0_kiq_read_clock(adev);
4134         } else {
4135                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4136                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4137                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4138         }
4139         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4140         amdgpu_gfx_off_ctrl(adev, true);
4141         return clock;
4142 }
4143
4144 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4145                                           uint32_t vmid,
4146                                           uint32_t gds_base, uint32_t gds_size,
4147                                           uint32_t gws_base, uint32_t gws_size,
4148                                           uint32_t oa_base, uint32_t oa_size)
4149 {
4150         struct amdgpu_device *adev = ring->adev;
4151
4152         /* GDS Base */
4153         gfx_v9_0_write_data_to_reg(ring, 0, false,
4154                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4155                                    gds_base);
4156
4157         /* GDS Size */
4158         gfx_v9_0_write_data_to_reg(ring, 0, false,
4159                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4160                                    gds_size);
4161
4162         /* GWS */
4163         gfx_v9_0_write_data_to_reg(ring, 0, false,
4164                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4165                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4166
4167         /* OA */
4168         gfx_v9_0_write_data_to_reg(ring, 0, false,
4169                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4170                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4171 }
4172
4173 static const u32 vgpr_init_compute_shader[] =
4174 {
4175         0xb07c0000, 0xbe8000ff,
4176         0x000000f8, 0xbf110800,
4177         0x7e000280, 0x7e020280,
4178         0x7e040280, 0x7e060280,
4179         0x7e080280, 0x7e0a0280,
4180         0x7e0c0280, 0x7e0e0280,
4181         0x80808800, 0xbe803200,
4182         0xbf84fff5, 0xbf9c0000,
4183         0xd28c0001, 0x0001007f,
4184         0xd28d0001, 0x0002027e,
4185         0x10020288, 0xb8810904,
4186         0xb7814000, 0xd1196a01,
4187         0x00000301, 0xbe800087,
4188         0xbefc00c1, 0xd89c4000,
4189         0x00020201, 0xd89cc080,
4190         0x00040401, 0x320202ff,
4191         0x00000800, 0x80808100,
4192         0xbf84fff8, 0x7e020280,
4193         0xbf810000, 0x00000000,
4194 };
4195
4196 static const u32 sgpr_init_compute_shader[] =
4197 {
4198         0xb07c0000, 0xbe8000ff,
4199         0x0000005f, 0xbee50080,
4200         0xbe812c65, 0xbe822c65,
4201         0xbe832c65, 0xbe842c65,
4202         0xbe852c65, 0xb77c0005,
4203         0x80808500, 0xbf84fff8,
4204         0xbe800080, 0xbf810000,
4205 };
4206
4207 static const u32 vgpr_init_compute_shader_arcturus[] = {
4208         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4209         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4210         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4211         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4212         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4213         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4214         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4215         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4216         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4217         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4218         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4219         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4220         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4221         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4222         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4223         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4224         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4225         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4226         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4227         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4228         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4229         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4230         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4231         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4232         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4233         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4234         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4235         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4236         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4237         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4238         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4239         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4240         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4241         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4242         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4243         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4244         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4245         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4246         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4247         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4248         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4249         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4250         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4251         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4252         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4253         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4254         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4255         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4256         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4257         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4258         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4259         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4260         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4261         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4262         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4263         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4264         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4265         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4266         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4267         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4268         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4269         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4270         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4271         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4272         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4273         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4274         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4275         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4276         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4277         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4278         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4279         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4280         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4281         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4282         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4283         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4284         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4285         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4286         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4287         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4288         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4289         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4290         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4291         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4292         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4293         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4294         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4295         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4296         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4297         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4298         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4299         0xbf84fff8, 0xbf810000,
4300 };
4301
4302 /* When below register arrays changed, please update gpr_reg_size,
4303   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4304   to cover all gfx9 ASICs */
4305 static const struct soc15_reg_entry vgpr_init_regs[] = {
4306    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4307    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4308    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4309    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4310    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4311    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4312    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4313    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4314    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4315    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4316    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4317    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4318    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4319    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4320 };
4321
4322 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4323    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4324    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4325    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4326    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4327    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4328    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4329    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4330    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4331    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4332    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4333    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4334    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4335    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4336    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4337 };
4338
4339 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4340    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4341    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4342    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4343    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4344    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4345    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4346    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4347    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4348    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4349    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4350    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4351    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4352    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4353    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4354 };
4355
4356 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4357    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4358    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4359    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4360    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4361    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4362    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4363    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4364    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4365    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4366    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4367    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4368    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4369    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4370    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4371 };
4372
4373 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4374    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4375    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4376    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4377    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4378    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4379    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4380    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4381    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4382    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4383    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4384    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4385    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4386    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4387    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4388    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4389    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4390    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4391    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4392    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4393    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4394    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4395    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4396    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4397    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4398    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4399    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4400    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4401    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4402    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4403    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4404    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4405    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4406    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4407 };
4408
4409 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4410 {
4411         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4412         int i, r;
4413
4414         /* only support when RAS is enabled */
4415         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4416                 return 0;
4417
4418         r = amdgpu_ring_alloc(ring, 7);
4419         if (r) {
4420                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4421                         ring->name, r);
4422                 return r;
4423         }
4424
4425         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4426         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4427
4428         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4429         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4430                                 PACKET3_DMA_DATA_DST_SEL(1) |
4431                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4432                                 PACKET3_DMA_DATA_ENGINE(0)));
4433         amdgpu_ring_write(ring, 0);
4434         amdgpu_ring_write(ring, 0);
4435         amdgpu_ring_write(ring, 0);
4436         amdgpu_ring_write(ring, 0);
4437         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4438                                 adev->gds.gds_size);
4439
4440         amdgpu_ring_commit(ring);
4441
4442         for (i = 0; i < adev->usec_timeout; i++) {
4443                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4444                         break;
4445                 udelay(1);
4446         }
4447
4448         if (i >= adev->usec_timeout)
4449                 r = -ETIMEDOUT;
4450
4451         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4452
4453         return r;
4454 }
4455
4456 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4457 {
4458         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4459         struct amdgpu_ib ib;
4460         struct dma_fence *f = NULL;
4461         int r, i;
4462         unsigned total_size, vgpr_offset, sgpr_offset;
4463         u64 gpu_addr;
4464
4465         int compute_dim_x = adev->gfx.config.max_shader_engines *
4466                                                 adev->gfx.config.max_cu_per_sh *
4467                                                 adev->gfx.config.max_sh_per_se;
4468         int sgpr_work_group_size = 5;
4469         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4470         int vgpr_init_shader_size;
4471         const u32 *vgpr_init_shader_ptr;
4472         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4473
4474         /* only support when RAS is enabled */
4475         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4476                 return 0;
4477
4478         /* bail if the compute ring is not ready */
4479         if (!ring->sched.ready)
4480                 return 0;
4481
4482         if (adev->asic_type == CHIP_ARCTURUS) {
4483                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4484                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4485                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4486         } else {
4487                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4488                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4489                 vgpr_init_regs_ptr = vgpr_init_regs;
4490         }
4491
4492         total_size =
4493                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4494         total_size +=
4495                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4496         total_size +=
4497                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4498         total_size = ALIGN(total_size, 256);
4499         vgpr_offset = total_size;
4500         total_size += ALIGN(vgpr_init_shader_size, 256);
4501         sgpr_offset = total_size;
4502         total_size += sizeof(sgpr_init_compute_shader);
4503
4504         /* allocate an indirect buffer to put the commands in */
4505         memset(&ib, 0, sizeof(ib));
4506         r = amdgpu_ib_get(adev, NULL, total_size,
4507                                         AMDGPU_IB_POOL_DIRECT, &ib);
4508         if (r) {
4509                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4510                 return r;
4511         }
4512
4513         /* load the compute shaders */
4514         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4515                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4516
4517         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4518                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4519
4520         /* init the ib length to 0 */
4521         ib.length_dw = 0;
4522
4523         /* VGPR */
4524         /* write the register state for the compute dispatch */
4525         for (i = 0; i < gpr_reg_size; i++) {
4526                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4527                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4528                                                                 - PACKET3_SET_SH_REG_START;
4529                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4530         }
4531         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4532         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4533         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4534         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4535                                                         - PACKET3_SET_SH_REG_START;
4536         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4537         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4538
4539         /* write dispatch packet */
4540         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4541         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4542         ib.ptr[ib.length_dw++] = 1; /* y */
4543         ib.ptr[ib.length_dw++] = 1; /* z */
4544         ib.ptr[ib.length_dw++] =
4545                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4546
4547         /* write CS partial flush packet */
4548         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4549         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4550
4551         /* SGPR1 */
4552         /* write the register state for the compute dispatch */
4553         for (i = 0; i < gpr_reg_size; i++) {
4554                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4555                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4556                                                                 - PACKET3_SET_SH_REG_START;
4557                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4558         }
4559         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4560         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4561         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4562         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4563                                                         - PACKET3_SET_SH_REG_START;
4564         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4565         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4566
4567         /* write dispatch packet */
4568         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4569         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4570         ib.ptr[ib.length_dw++] = 1; /* y */
4571         ib.ptr[ib.length_dw++] = 1; /* z */
4572         ib.ptr[ib.length_dw++] =
4573                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4574
4575         /* write CS partial flush packet */
4576         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4577         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4578
4579         /* SGPR2 */
4580         /* write the register state for the compute dispatch */
4581         for (i = 0; i < gpr_reg_size; i++) {
4582                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4583                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4584                                                                 - PACKET3_SET_SH_REG_START;
4585                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4586         }
4587         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4588         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4589         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4590         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4591                                                         - PACKET3_SET_SH_REG_START;
4592         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4593         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4594
4595         /* write dispatch packet */
4596         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4597         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4598         ib.ptr[ib.length_dw++] = 1; /* y */
4599         ib.ptr[ib.length_dw++] = 1; /* z */
4600         ib.ptr[ib.length_dw++] =
4601                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4602
4603         /* write CS partial flush packet */
4604         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4605         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4606
4607         /* shedule the ib on the ring */
4608         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4609         if (r) {
4610                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4611                 goto fail;
4612         }
4613
4614         /* wait for the GPU to finish processing the IB */
4615         r = dma_fence_wait(f, false);
4616         if (r) {
4617                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4618                 goto fail;
4619         }
4620
4621 fail:
4622         amdgpu_ib_free(adev, &ib, NULL);
4623         dma_fence_put(f);
4624
4625         return r;
4626 }
4627
4628 static int gfx_v9_0_early_init(void *handle)
4629 {
4630         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4631
4632         if (adev->asic_type == CHIP_ARCTURUS)
4633                 adev->gfx.num_gfx_rings = 0;
4634         else
4635                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4636         adev->gfx.num_compute_rings = amdgpu_num_kcq;
4637         gfx_v9_0_set_kiq_pm4_funcs(adev);
4638         gfx_v9_0_set_ring_funcs(adev);
4639         gfx_v9_0_set_irq_funcs(adev);
4640         gfx_v9_0_set_gds_init(adev);
4641         gfx_v9_0_set_rlc_funcs(adev);
4642
4643         return 0;
4644 }
4645
4646 static int gfx_v9_0_ecc_late_init(void *handle)
4647 {
4648         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4649         int r;
4650
4651         /*
4652          * Temp workaround to fix the issue that CP firmware fails to
4653          * update read pointer when CPDMA is writing clearing operation
4654          * to GDS in suspend/resume sequence on several cards. So just
4655          * limit this operation in cold boot sequence.
4656          */
4657         if (!adev->in_suspend) {
4658                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4659                 if (r)
4660                         return r;
4661         }
4662
4663         /* requires IBs so do in late init after IB pool is initialized */
4664         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4665         if (r)
4666                 return r;
4667
4668         if (adev->gfx.funcs &&
4669             adev->gfx.funcs->reset_ras_error_count)
4670                 adev->gfx.funcs->reset_ras_error_count(adev);
4671
4672         r = amdgpu_gfx_ras_late_init(adev);
4673         if (r)
4674                 return r;
4675
4676         return 0;
4677 }
4678
4679 static int gfx_v9_0_late_init(void *handle)
4680 {
4681         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4682         int r;
4683
4684         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4685         if (r)
4686                 return r;
4687
4688         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4689         if (r)
4690                 return r;
4691
4692         r = gfx_v9_0_ecc_late_init(handle);
4693         if (r)
4694                 return r;
4695
4696         return 0;
4697 }
4698
4699 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4700 {
4701         uint32_t rlc_setting;
4702
4703         /* if RLC is not enabled, do nothing */
4704         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4705         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4706                 return false;
4707
4708         return true;
4709 }
4710
4711 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4712 {
4713         uint32_t data;
4714         unsigned i;
4715
4716         data = RLC_SAFE_MODE__CMD_MASK;
4717         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4718         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4719
4720         /* wait for RLC_SAFE_MODE */
4721         for (i = 0; i < adev->usec_timeout; i++) {
4722                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4723                         break;
4724                 udelay(1);
4725         }
4726 }
4727
4728 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4729 {
4730         uint32_t data;
4731
4732         data = RLC_SAFE_MODE__CMD_MASK;
4733         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4734 }
4735
4736 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4737                                                 bool enable)
4738 {
4739         amdgpu_gfx_rlc_enter_safe_mode(adev);
4740
4741         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4742                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4743                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4744                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4745         } else {
4746                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4747                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4748                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4749         }
4750
4751         amdgpu_gfx_rlc_exit_safe_mode(adev);
4752 }
4753
4754 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4755                                                 bool enable)
4756 {
4757         /* TODO: double check if we need to perform under safe mode */
4758         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4759
4760         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4761                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4762         else
4763                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4764
4765         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4766                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4767         else
4768                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4769
4770         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4771 }
4772
4773 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4774                                                       bool enable)
4775 {
4776         uint32_t data, def;
4777
4778         amdgpu_gfx_rlc_enter_safe_mode(adev);
4779
4780         /* It is disabled by HW by default */
4781         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4782                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4783                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4784
4785                 if (adev->asic_type != CHIP_VEGA12)
4786                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4787
4788                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4789                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4790                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4791
4792                 /* only for Vega10 & Raven1 */
4793                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4794
4795                 if (def != data)
4796                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4797
4798                 /* MGLS is a global flag to control all MGLS in GFX */
4799                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4800                         /* 2 - RLC memory Light sleep */
4801                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4802                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4803                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4804                                 if (def != data)
4805                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4806                         }
4807                         /* 3 - CP memory Light sleep */
4808                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4809                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4810                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4811                                 if (def != data)
4812                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4813                         }
4814                 }
4815         } else {
4816                 /* 1 - MGCG_OVERRIDE */
4817                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4818
4819                 if (adev->asic_type != CHIP_VEGA12)
4820                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4821
4822                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4823                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4824                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4825                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4826
4827                 if (def != data)
4828                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4829
4830                 /* 2 - disable MGLS in RLC */
4831                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4832                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4833                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4834                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4835                 }
4836
4837                 /* 3 - disable MGLS in CP */
4838                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4839                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4840                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4841                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4842                 }
4843         }
4844
4845         amdgpu_gfx_rlc_exit_safe_mode(adev);
4846 }
4847
4848 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4849                                            bool enable)
4850 {
4851         uint32_t data, def;
4852
4853         if (adev->asic_type == CHIP_ARCTURUS)
4854                 return;
4855
4856         amdgpu_gfx_rlc_enter_safe_mode(adev);
4857
4858         /* Enable 3D CGCG/CGLS */
4859         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4860                 /* write cmd to clear cgcg/cgls ov */
4861                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4862                 /* unset CGCG override */
4863                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4864                 /* update CGCG and CGLS override bits */
4865                 if (def != data)
4866                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4867
4868                 /* enable 3Dcgcg FSM(0x0000363f) */
4869                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4870
4871                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4872                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4873                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4874                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4875                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4876                 if (def != data)
4877                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4878
4879                 /* set IDLE_POLL_COUNT(0x00900100) */
4880                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4881                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4882                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4883                 if (def != data)
4884                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4885         } else {
4886                 /* Disable CGCG/CGLS */
4887                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4888                 /* disable cgcg, cgls should be disabled */
4889                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4890                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4891                 /* disable cgcg and cgls in FSM */
4892                 if (def != data)
4893                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4894         }
4895
4896         amdgpu_gfx_rlc_exit_safe_mode(adev);
4897 }
4898
4899 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4900                                                       bool enable)
4901 {
4902         uint32_t def, data;
4903
4904         amdgpu_gfx_rlc_enter_safe_mode(adev);
4905
4906         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4907                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4908                 /* unset CGCG override */
4909                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4910                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4911                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4912                 else
4913                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4914                 /* update CGCG and CGLS override bits */
4915                 if (def != data)
4916                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4917
4918                 /* enable cgcg FSM(0x0000363F) */
4919                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4920
4921                 if (adev->asic_type == CHIP_ARCTURUS)
4922                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4923                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4924                 else
4925                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4926                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4927                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4928                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4929                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4930                 if (def != data)
4931                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4932
4933                 /* set IDLE_POLL_COUNT(0x00900100) */
4934                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4935                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4936                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4937                 if (def != data)
4938                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4939         } else {
4940                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4941                 /* reset CGCG/CGLS bits */
4942                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4943                 /* disable cgcg and cgls in FSM */
4944                 if (def != data)
4945                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4946         }
4947
4948         amdgpu_gfx_rlc_exit_safe_mode(adev);
4949 }
4950
4951 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4952                                             bool enable)
4953 {
4954         if (enable) {
4955                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4956                  * ===  MGCG + MGLS ===
4957                  */
4958                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4959                 /* ===  CGCG /CGLS for GFX 3D Only === */
4960                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4961                 /* ===  CGCG + CGLS === */
4962                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4963         } else {
4964                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4965                  * ===  CGCG + CGLS ===
4966                  */
4967                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4968                 /* ===  CGCG /CGLS for GFX 3D Only === */
4969                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4970                 /* ===  MGCG + MGLS === */
4971                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4972         }
4973         return 0;
4974 }
4975
4976 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4977 {
4978         u32 reg, data;
4979
4980         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4981         if (amdgpu_sriov_is_pp_one_vf(adev))
4982                 data = RREG32_NO_KIQ(reg);
4983         else
4984                 data = RREG32(reg);
4985
4986         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4987         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4988
4989         if (amdgpu_sriov_is_pp_one_vf(adev))
4990                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4991         else
4992                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4993 }
4994
4995 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4996                                         uint32_t offset,
4997                                         struct soc15_reg_rlcg *entries, int arr_size)
4998 {
4999         int i;
5000         uint32_t reg;
5001
5002         if (!entries)
5003                 return false;
5004
5005         for (i = 0; i < arr_size; i++) {
5006                 const struct soc15_reg_rlcg *entry;
5007
5008                 entry = &entries[i];
5009                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5010                 if (offset == reg)
5011                         return true;
5012         }
5013
5014         return false;
5015 }
5016
5017 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5018 {
5019         return gfx_v9_0_check_rlcg_range(adev, offset,
5020                                         (void *)rlcg_access_gc_9_0,
5021                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5022 }
5023
5024 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5025         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5026         .set_safe_mode = gfx_v9_0_set_safe_mode,
5027         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5028         .init = gfx_v9_0_rlc_init,
5029         .get_csb_size = gfx_v9_0_get_csb_size,
5030         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5031         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5032         .resume = gfx_v9_0_rlc_resume,
5033         .stop = gfx_v9_0_rlc_stop,
5034         .reset = gfx_v9_0_rlc_reset,
5035         .start = gfx_v9_0_rlc_start,
5036         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5037         .rlcg_wreg = gfx_v9_0_rlcg_wreg,
5038         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5039 };
5040
5041 static int gfx_v9_0_set_powergating_state(void *handle,
5042                                           enum amd_powergating_state state)
5043 {
5044         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5045         bool enable = (state == AMD_PG_STATE_GATE);
5046
5047         switch (adev->asic_type) {
5048         case CHIP_RAVEN:
5049         case CHIP_RENOIR:
5050                 if (!enable)
5051                         amdgpu_gfx_off_ctrl(adev, false);
5052
5053                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5054                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5055                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5056                 } else {
5057                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5058                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5059                 }
5060
5061                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5062                         gfx_v9_0_enable_cp_power_gating(adev, true);
5063                 else
5064                         gfx_v9_0_enable_cp_power_gating(adev, false);
5065
5066                 /* update gfx cgpg state */
5067                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5068
5069                 /* update mgcg state */
5070                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5071
5072                 if (enable)
5073                         amdgpu_gfx_off_ctrl(adev, true);
5074                 break;
5075         case CHIP_VEGA12:
5076                 amdgpu_gfx_off_ctrl(adev, enable);
5077                 break;
5078         default:
5079                 break;
5080         }
5081
5082         return 0;
5083 }
5084
5085 static int gfx_v9_0_set_clockgating_state(void *handle,
5086                                           enum amd_clockgating_state state)
5087 {
5088         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5089
5090         if (amdgpu_sriov_vf(adev))
5091                 return 0;
5092
5093         switch (adev->asic_type) {
5094         case CHIP_VEGA10:
5095         case CHIP_VEGA12:
5096         case CHIP_VEGA20:
5097         case CHIP_RAVEN:
5098         case CHIP_ARCTURUS:
5099         case CHIP_RENOIR:
5100                 gfx_v9_0_update_gfx_clock_gating(adev,
5101                                                  state == AMD_CG_STATE_GATE);
5102                 break;
5103         default:
5104                 break;
5105         }
5106         return 0;
5107 }
5108
5109 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5110 {
5111         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112         int data;
5113
5114         if (amdgpu_sriov_vf(adev))
5115                 *flags = 0;
5116
5117         /* AMD_CG_SUPPORT_GFX_MGCG */
5118         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5119         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5120                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5121
5122         /* AMD_CG_SUPPORT_GFX_CGCG */
5123         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5124         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5125                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5126
5127         /* AMD_CG_SUPPORT_GFX_CGLS */
5128         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5129                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5130
5131         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5132         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5133         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5134                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5135
5136         /* AMD_CG_SUPPORT_GFX_CP_LS */
5137         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5138         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5139                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5140
5141         if (adev->asic_type != CHIP_ARCTURUS) {
5142                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5143                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5144                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5145                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5146
5147                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5148                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5149                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5150         }
5151 }
5152
5153 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5154 {
5155         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5156 }
5157
5158 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5159 {
5160         struct amdgpu_device *adev = ring->adev;
5161         u64 wptr;
5162
5163         /* XXX check if swapping is necessary on BE */
5164         if (ring->use_doorbell) {
5165                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5166         } else {
5167                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5168                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5169         }
5170
5171         return wptr;
5172 }
5173
5174 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5175 {
5176         struct amdgpu_device *adev = ring->adev;
5177
5178         if (ring->use_doorbell) {
5179                 /* XXX check if swapping is necessary on BE */
5180                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5181                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5182         } else {
5183                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5184                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5185         }
5186 }
5187
5188 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5189 {
5190         struct amdgpu_device *adev = ring->adev;
5191         u32 ref_and_mask, reg_mem_engine;
5192         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5193
5194         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5195                 switch (ring->me) {
5196                 case 1:
5197                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5198                         break;
5199                 case 2:
5200                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5201                         break;
5202                 default:
5203                         return;
5204                 }
5205                 reg_mem_engine = 0;
5206         } else {
5207                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5208                 reg_mem_engine = 1; /* pfp */
5209         }
5210
5211         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5212                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5213                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5214                               ref_and_mask, ref_and_mask, 0x20);
5215 }
5216
5217 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5218                                         struct amdgpu_job *job,
5219                                         struct amdgpu_ib *ib,
5220                                         uint32_t flags)
5221 {
5222         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5223         u32 header, control = 0;
5224
5225         if (ib->flags & AMDGPU_IB_FLAG_CE)
5226                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5227         else
5228                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5229
5230         control |= ib->length_dw | (vmid << 24);
5231
5232         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5233                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5234
5235                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5236                         gfx_v9_0_ring_emit_de_meta(ring);
5237         }
5238
5239         amdgpu_ring_write(ring, header);
5240         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5241         amdgpu_ring_write(ring,
5242 #ifdef __BIG_ENDIAN
5243                 (2 << 0) |
5244 #endif
5245                 lower_32_bits(ib->gpu_addr));
5246         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5247         amdgpu_ring_write(ring, control);
5248 }
5249
5250 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5251                                           struct amdgpu_job *job,
5252                                           struct amdgpu_ib *ib,
5253                                           uint32_t flags)
5254 {
5255         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5256         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5257
5258         /* Currently, there is a high possibility to get wave ID mismatch
5259          * between ME and GDS, leading to a hw deadlock, because ME generates
5260          * different wave IDs than the GDS expects. This situation happens
5261          * randomly when at least 5 compute pipes use GDS ordered append.
5262          * The wave IDs generated by ME are also wrong after suspend/resume.
5263          * Those are probably bugs somewhere else in the kernel driver.
5264          *
5265          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5266          * GDS to 0 for this ring (me/pipe).
5267          */
5268         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5269                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5270                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5271                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5272         }
5273
5274         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5275         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5276         amdgpu_ring_write(ring,
5277 #ifdef __BIG_ENDIAN
5278                                 (2 << 0) |
5279 #endif
5280                                 lower_32_bits(ib->gpu_addr));
5281         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5282         amdgpu_ring_write(ring, control);
5283 }
5284
5285 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5286                                      u64 seq, unsigned flags)
5287 {
5288         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5289         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5290         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5291
5292         /* RELEASE_MEM - flush caches, send int */
5293         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5294         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5295                                                EOP_TC_NC_ACTION_EN) :
5296                                               (EOP_TCL1_ACTION_EN |
5297                                                EOP_TC_ACTION_EN |
5298                                                EOP_TC_WB_ACTION_EN |
5299                                                EOP_TC_MD_ACTION_EN)) |
5300                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5301                                  EVENT_INDEX(5)));
5302         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5303
5304         /*
5305          * the address should be Qword aligned if 64bit write, Dword
5306          * aligned if only send 32bit data low (discard data high)
5307          */
5308         if (write64bit)
5309                 BUG_ON(addr & 0x7);
5310         else
5311                 BUG_ON(addr & 0x3);
5312         amdgpu_ring_write(ring, lower_32_bits(addr));
5313         amdgpu_ring_write(ring, upper_32_bits(addr));
5314         amdgpu_ring_write(ring, lower_32_bits(seq));
5315         amdgpu_ring_write(ring, upper_32_bits(seq));
5316         amdgpu_ring_write(ring, 0);
5317 }
5318
5319 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5320 {
5321         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5322         uint32_t seq = ring->fence_drv.sync_seq;
5323         uint64_t addr = ring->fence_drv.gpu_addr;
5324
5325         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5326                               lower_32_bits(addr), upper_32_bits(addr),
5327                               seq, 0xffffffff, 4);
5328 }
5329
5330 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5331                                         unsigned vmid, uint64_t pd_addr)
5332 {
5333         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5334
5335         /* compute doesn't have PFP */
5336         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5337                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5338                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5339                 amdgpu_ring_write(ring, 0x0);
5340         }
5341 }
5342
5343 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5344 {
5345         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5346 }
5347
5348 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5349 {
5350         u64 wptr;
5351
5352         /* XXX check if swapping is necessary on BE */
5353         if (ring->use_doorbell)
5354                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5355         else
5356                 BUG();
5357         return wptr;
5358 }
5359
5360 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5361 {
5362         struct amdgpu_device *adev = ring->adev;
5363
5364         /* XXX check if swapping is necessary on BE */
5365         if (ring->use_doorbell) {
5366                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5367                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5368         } else{
5369                 BUG(); /* only DOORBELL method supported on gfx9 now */
5370         }
5371 }
5372
5373 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5374                                          u64 seq, unsigned int flags)
5375 {
5376         struct amdgpu_device *adev = ring->adev;
5377
5378         /* we only allocate 32bit for each seq wb address */
5379         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5380
5381         /* write fence seq to the "addr" */
5382         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5383         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5384                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5385         amdgpu_ring_write(ring, lower_32_bits(addr));
5386         amdgpu_ring_write(ring, upper_32_bits(addr));
5387         amdgpu_ring_write(ring, lower_32_bits(seq));
5388
5389         if (flags & AMDGPU_FENCE_FLAG_INT) {
5390                 /* set register to trigger INT */
5391                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5392                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5393                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5394                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5395                 amdgpu_ring_write(ring, 0);
5396                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5397         }
5398 }
5399
5400 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5401 {
5402         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5403         amdgpu_ring_write(ring, 0);
5404 }
5405
5406 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5407 {
5408         struct v9_ce_ib_state ce_payload = {0};
5409         uint64_t csa_addr;
5410         int cnt;
5411
5412         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5413         csa_addr = amdgpu_csa_vaddr(ring->adev);
5414
5415         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5416         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5417                                  WRITE_DATA_DST_SEL(8) |
5418                                  WR_CONFIRM) |
5419                                  WRITE_DATA_CACHE_POLICY(0));
5420         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5421         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5422         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5423 }
5424
5425 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5426 {
5427         struct v9_de_ib_state de_payload = {0};
5428         uint64_t csa_addr, gds_addr;
5429         int cnt;
5430
5431         csa_addr = amdgpu_csa_vaddr(ring->adev);
5432         gds_addr = csa_addr + 4096;
5433         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5434         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5435
5436         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5437         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5438         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5439                                  WRITE_DATA_DST_SEL(8) |
5440                                  WR_CONFIRM) |
5441                                  WRITE_DATA_CACHE_POLICY(0));
5442         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5443         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5444         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5445 }
5446
5447 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5448                                    bool secure)
5449 {
5450         uint32_t v = secure ? FRAME_TMZ : 0;
5451
5452         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5453         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5454 }
5455
5456 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5457 {
5458         uint32_t dw2 = 0;
5459
5460         if (amdgpu_sriov_vf(ring->adev))
5461                 gfx_v9_0_ring_emit_ce_meta(ring);
5462
5463         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5464         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5465                 /* set load_global_config & load_global_uconfig */
5466                 dw2 |= 0x8001;
5467                 /* set load_cs_sh_regs */
5468                 dw2 |= 0x01000000;
5469                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5470                 dw2 |= 0x10002;
5471
5472                 /* set load_ce_ram if preamble presented */
5473                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5474                         dw2 |= 0x10000000;
5475         } else {
5476                 /* still load_ce_ram if this is the first time preamble presented
5477                  * although there is no context switch happens.
5478                  */
5479                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5480                         dw2 |= 0x10000000;
5481         }
5482
5483         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5484         amdgpu_ring_write(ring, dw2);
5485         amdgpu_ring_write(ring, 0);
5486 }
5487
5488 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5489 {
5490         unsigned ret;
5491         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5492         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5493         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5494         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5495         ret = ring->wptr & ring->buf_mask;
5496         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5497         return ret;
5498 }
5499
5500 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5501 {
5502         unsigned cur;
5503         BUG_ON(offset > ring->buf_mask);
5504         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5505
5506         cur = (ring->wptr & ring->buf_mask) - 1;
5507         if (likely(cur > offset))
5508                 ring->ring[offset] = cur - offset;
5509         else
5510                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5511 }
5512
5513 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5514                                     uint32_t reg_val_offs)
5515 {
5516         struct amdgpu_device *adev = ring->adev;
5517
5518         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5519         amdgpu_ring_write(ring, 0 |     /* src: register*/
5520                                 (5 << 8) |      /* dst: memory */
5521                                 (1 << 20));     /* write confirm */
5522         amdgpu_ring_write(ring, reg);
5523         amdgpu_ring_write(ring, 0);
5524         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5525                                 reg_val_offs * 4));
5526         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5527                                 reg_val_offs * 4));
5528 }
5529
5530 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5531                                     uint32_t val)
5532 {
5533         uint32_t cmd = 0;
5534
5535         switch (ring->funcs->type) {
5536         case AMDGPU_RING_TYPE_GFX:
5537                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5538                 break;
5539         case AMDGPU_RING_TYPE_KIQ:
5540                 cmd = (1 << 16); /* no inc addr */
5541                 break;
5542         default:
5543                 cmd = WR_CONFIRM;
5544                 break;
5545         }
5546         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5547         amdgpu_ring_write(ring, cmd);
5548         amdgpu_ring_write(ring, reg);
5549         amdgpu_ring_write(ring, 0);
5550         amdgpu_ring_write(ring, val);
5551 }
5552
5553 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5554                                         uint32_t val, uint32_t mask)
5555 {
5556         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5557 }
5558
5559 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5560                                                   uint32_t reg0, uint32_t reg1,
5561                                                   uint32_t ref, uint32_t mask)
5562 {
5563         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5564         struct amdgpu_device *adev = ring->adev;
5565         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5566                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5567
5568         if (fw_version_ok)
5569                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5570                                       ref, mask, 0x20);
5571         else
5572                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5573                                                            ref, mask);
5574 }
5575
5576 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5577 {
5578         struct amdgpu_device *adev = ring->adev;
5579         uint32_t value = 0;
5580
5581         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5582         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5583         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5584         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5585         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5586 }
5587
5588 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5589                                                  enum amdgpu_interrupt_state state)
5590 {
5591         switch (state) {
5592         case AMDGPU_IRQ_STATE_DISABLE:
5593         case AMDGPU_IRQ_STATE_ENABLE:
5594                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5595                                TIME_STAMP_INT_ENABLE,
5596                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5597                 break;
5598         default:
5599                 break;
5600         }
5601 }
5602
5603 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5604                                                      int me, int pipe,
5605                                                      enum amdgpu_interrupt_state state)
5606 {
5607         u32 mec_int_cntl, mec_int_cntl_reg;
5608
5609         /*
5610          * amdgpu controls only the first MEC. That's why this function only
5611          * handles the setting of interrupts for this specific MEC. All other
5612          * pipes' interrupts are set by amdkfd.
5613          */
5614
5615         if (me == 1) {
5616                 switch (pipe) {
5617                 case 0:
5618                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5619                         break;
5620                 case 1:
5621                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5622                         break;
5623                 case 2:
5624                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5625                         break;
5626                 case 3:
5627                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5628                         break;
5629                 default:
5630                         DRM_DEBUG("invalid pipe %d\n", pipe);
5631                         return;
5632                 }
5633         } else {
5634                 DRM_DEBUG("invalid me %d\n", me);
5635                 return;
5636         }
5637
5638         switch (state) {
5639         case AMDGPU_IRQ_STATE_DISABLE:
5640                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5641                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5642                                              TIME_STAMP_INT_ENABLE, 0);
5643                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5644                 break;
5645         case AMDGPU_IRQ_STATE_ENABLE:
5646                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5647                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5648                                              TIME_STAMP_INT_ENABLE, 1);
5649                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5650                 break;
5651         default:
5652                 break;
5653         }
5654 }
5655
5656 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5657                                              struct amdgpu_irq_src *source,
5658                                              unsigned type,
5659                                              enum amdgpu_interrupt_state state)
5660 {
5661         switch (state) {
5662         case AMDGPU_IRQ_STATE_DISABLE:
5663         case AMDGPU_IRQ_STATE_ENABLE:
5664                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5665                                PRIV_REG_INT_ENABLE,
5666                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5667                 break;
5668         default:
5669                 break;
5670         }
5671
5672         return 0;
5673 }
5674
5675 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5676                                               struct amdgpu_irq_src *source,
5677                                               unsigned type,
5678                                               enum amdgpu_interrupt_state state)
5679 {
5680         switch (state) {
5681         case AMDGPU_IRQ_STATE_DISABLE:
5682         case AMDGPU_IRQ_STATE_ENABLE:
5683                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5684                                PRIV_INSTR_INT_ENABLE,
5685                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5686         default:
5687                 break;
5688         }
5689
5690         return 0;
5691 }
5692
5693 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5694         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5695                         CP_ECC_ERROR_INT_ENABLE, 1)
5696
5697 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5698         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5699                         CP_ECC_ERROR_INT_ENABLE, 0)
5700
5701 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5702                                               struct amdgpu_irq_src *source,
5703                                               unsigned type,
5704                                               enum amdgpu_interrupt_state state)
5705 {
5706         switch (state) {
5707         case AMDGPU_IRQ_STATE_DISABLE:
5708                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5709                                 CP_ECC_ERROR_INT_ENABLE, 0);
5710                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5711                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5712                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5713                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5714                 break;
5715
5716         case AMDGPU_IRQ_STATE_ENABLE:
5717                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5718                                 CP_ECC_ERROR_INT_ENABLE, 1);
5719                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5720                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5721                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5722                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5723                 break;
5724         default:
5725                 break;
5726         }
5727
5728         return 0;
5729 }
5730
5731
5732 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5733                                             struct amdgpu_irq_src *src,
5734                                             unsigned type,
5735                                             enum amdgpu_interrupt_state state)
5736 {
5737         switch (type) {
5738         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5739                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5740                 break;
5741         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5742                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5743                 break;
5744         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5745                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5746                 break;
5747         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5748                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5749                 break;
5750         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5751                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5752                 break;
5753         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5754                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5755                 break;
5756         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5757                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5758                 break;
5759         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5760                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5761                 break;
5762         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5763                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5764                 break;
5765         default:
5766                 break;
5767         }
5768         return 0;
5769 }
5770
5771 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5772                             struct amdgpu_irq_src *source,
5773                             struct amdgpu_iv_entry *entry)
5774 {
5775         int i;
5776         u8 me_id, pipe_id, queue_id;
5777         struct amdgpu_ring *ring;
5778
5779         DRM_DEBUG("IH: CP EOP\n");
5780         me_id = (entry->ring_id & 0x0c) >> 2;
5781         pipe_id = (entry->ring_id & 0x03) >> 0;
5782         queue_id = (entry->ring_id & 0x70) >> 4;
5783
5784         switch (me_id) {
5785         case 0:
5786                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5787                 break;
5788         case 1:
5789         case 2:
5790                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5791                         ring = &adev->gfx.compute_ring[i];
5792                         /* Per-queue interrupt is supported for MEC starting from VI.
5793                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5794                           */
5795                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5796                                 amdgpu_fence_process(ring);
5797                 }
5798                 break;
5799         }
5800         return 0;
5801 }
5802
5803 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5804                            struct amdgpu_iv_entry *entry)
5805 {
5806         u8 me_id, pipe_id, queue_id;
5807         struct amdgpu_ring *ring;
5808         int i;
5809
5810         me_id = (entry->ring_id & 0x0c) >> 2;
5811         pipe_id = (entry->ring_id & 0x03) >> 0;
5812         queue_id = (entry->ring_id & 0x70) >> 4;
5813
5814         switch (me_id) {
5815         case 0:
5816                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5817                 break;
5818         case 1:
5819         case 2:
5820                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5821                         ring = &adev->gfx.compute_ring[i];
5822                         if (ring->me == me_id && ring->pipe == pipe_id &&
5823                             ring->queue == queue_id)
5824                                 drm_sched_fault(&ring->sched);
5825                 }
5826                 break;
5827         }
5828 }
5829
5830 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5831                                  struct amdgpu_irq_src *source,
5832                                  struct amdgpu_iv_entry *entry)
5833 {
5834         DRM_ERROR("Illegal register access in command stream\n");
5835         gfx_v9_0_fault(adev, entry);
5836         return 0;
5837 }
5838
5839 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5840                                   struct amdgpu_irq_src *source,
5841                                   struct amdgpu_iv_entry *entry)
5842 {
5843         DRM_ERROR("Illegal instruction in command stream\n");
5844         gfx_v9_0_fault(adev, entry);
5845         return 0;
5846 }
5847
5848
5849 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5850         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5851           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5852           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5853         },
5854         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5855           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5856           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5857         },
5858         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5859           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5860           0, 0
5861         },
5862         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5863           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5864           0, 0
5865         },
5866         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5867           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5868           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5869         },
5870         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5871           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5872           0, 0
5873         },
5874         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5875           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5876           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5877         },
5878         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5879           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5880           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5881         },
5882         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5883           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5884           0, 0
5885         },
5886         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5887           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5888           0, 0
5889         },
5890         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5891           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5892           0, 0
5893         },
5894         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5895           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5896           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5897         },
5898         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5899           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5900           0, 0
5901         },
5902         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5903           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5904           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5905         },
5906         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5907           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5908           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5909           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5910         },
5911         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5912           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5913           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5914           0, 0
5915         },
5916         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5917           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5918           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5919           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5920         },
5921         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5922           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5923           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5924           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5925         },
5926         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5927           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5928           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5929           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5930         },
5931         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5932           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5933           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5934           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5935         },
5936         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5937           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5938           0, 0
5939         },
5940         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5941           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5942           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5943         },
5944         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5945           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5946           0, 0
5947         },
5948         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5949           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5950           0, 0
5951         },
5952         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5953           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5954           0, 0
5955         },
5956         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5957           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5958           0, 0
5959         },
5960         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5961           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5962           0, 0
5963         },
5964         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5965           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5966           0, 0
5967         },
5968         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5969           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5970           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5971         },
5972         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5973           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5974           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5975         },
5976         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5977           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5978           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5979         },
5980         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5981           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5982           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5983         },
5984         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5985           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5986           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5987         },
5988         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5989           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5990           0, 0
5991         },
5992         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5993           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5994           0, 0
5995         },
5996         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5997           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5998           0, 0
5999         },
6000         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6001           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6002           0, 0
6003         },
6004         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6005           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6006           0, 0
6007         },
6008         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6009           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6010           0, 0
6011         },
6012         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6013           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6014           0, 0
6015         },
6016         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6017           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6018           0, 0
6019         },
6020         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6021           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6022           0, 0
6023         },
6024         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6025           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6026           0, 0
6027         },
6028         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6029           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6030           0, 0
6031         },
6032         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6033           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6034           0, 0
6035         },
6036         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6037           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6038           0, 0
6039         },
6040         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6041           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6042           0, 0
6043         },
6044         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6045           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6046           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6047         },
6048         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6049           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6050           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6051         },
6052         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6053           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6054           0, 0
6055         },
6056         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6057           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6058           0, 0
6059         },
6060         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6061           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6062           0, 0
6063         },
6064         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6065           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6066           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6067         },
6068         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6069           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6070           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6071         },
6072         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6073           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6074           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6075         },
6076         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6077           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6078           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6079         },
6080         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6081           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6082           0, 0
6083         },
6084         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6085           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6086           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6087         },
6088         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6089           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6090           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6091         },
6092         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6093           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6094           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6095         },
6096         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6097           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6098           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6099         },
6100         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6101           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6102           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6103         },
6104         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6105           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6106           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6107         },
6108         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6109           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6110           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6111         },
6112         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6113           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6114           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6115         },
6116         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6117           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6118           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6119         },
6120         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6121           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6122           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6123         },
6124         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6125           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6126           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6127         },
6128         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6129           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6130           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6131         },
6132         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6133           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6134           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6135         },
6136         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6137           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6138           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6139         },
6140         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6141           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6142           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6143         },
6144         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6145           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6146           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6147         },
6148         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6149           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6150           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6151         },
6152         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6153           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6154           0, 0
6155         },
6156         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6157           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6158           0, 0
6159         },
6160         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6161           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6162           0, 0
6163         },
6164         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6165           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6166           0, 0
6167         },
6168         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6169           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6170           0, 0
6171         },
6172         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6173           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6174           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6175         },
6176         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6177           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6178           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6179         },
6180         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6181           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6182           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6183         },
6184         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6185           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6186           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6187         },
6188         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6189           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6190           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6191         },
6192         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6193           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6194           0, 0
6195         },
6196         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6197           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6198           0, 0
6199         },
6200         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6201           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6202           0, 0
6203         },
6204         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6205           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6206           0, 0
6207         },
6208         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6209           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6210           0, 0
6211         },
6212         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6213           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6214           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6215         },
6216         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6217           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6218           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6219         },
6220         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6221           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6222           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6223         },
6224         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6225           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6226           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6227         },
6228         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6229           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6230           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6231         },
6232         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6233           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6234           0, 0
6235         },
6236         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6237           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6238           0, 0
6239         },
6240         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6241           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6242           0, 0
6243         },
6244         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6245           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6246           0, 0
6247         },
6248         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6249           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6250           0, 0
6251         },
6252         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6253           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6254           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6255         },
6256         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6257           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6258           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6259         },
6260         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6261           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6262           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6263         },
6264         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6265           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6266           0, 0
6267         },
6268         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6269           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6270           0, 0
6271         },
6272         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6273           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6274           0, 0
6275         },
6276         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6277           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6278           0, 0
6279         },
6280         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6281           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6282           0, 0
6283         },
6284         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6285           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6286           0, 0
6287         }
6288 };
6289
6290 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6291                                      void *inject_if)
6292 {
6293         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6294         int ret;
6295         struct ta_ras_trigger_error_input block_info = { 0 };
6296
6297         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6298                 return -EINVAL;
6299
6300         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6301                 return -EINVAL;
6302
6303         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6304                 return -EPERM;
6305
6306         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6307               info->head.type)) {
6308                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6309                         ras_gfx_subblocks[info->head.sub_block_index].name,
6310                         info->head.type);
6311                 return -EPERM;
6312         }
6313
6314         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6315               info->head.type)) {
6316                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6317                         ras_gfx_subblocks[info->head.sub_block_index].name,
6318                         info->head.type);
6319                 return -EPERM;
6320         }
6321
6322         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6323         block_info.sub_block_index =
6324                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6325         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6326         block_info.address = info->address;
6327         block_info.value = info->value;
6328
6329         mutex_lock(&adev->grbm_idx_mutex);
6330         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6331         mutex_unlock(&adev->grbm_idx_mutex);
6332
6333         return ret;
6334 }
6335
6336 static const char *vml2_mems[] = {
6337         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6338         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6339         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6340         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6341         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6342         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6343         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6344         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6345         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6346         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6347         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6348         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6349         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6350         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6351         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6352         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6353 };
6354
6355 static const char *vml2_walker_mems[] = {
6356         "UTC_VML2_CACHE_PDE0_MEM0",
6357         "UTC_VML2_CACHE_PDE0_MEM1",
6358         "UTC_VML2_CACHE_PDE1_MEM0",
6359         "UTC_VML2_CACHE_PDE1_MEM1",
6360         "UTC_VML2_CACHE_PDE2_MEM0",
6361         "UTC_VML2_CACHE_PDE2_MEM1",
6362         "UTC_VML2_RDIF_LOG_FIFO",
6363 };
6364
6365 static const char *atc_l2_cache_2m_mems[] = {
6366         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6367         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6368         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6369         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6370 };
6371
6372 static const char *atc_l2_cache_4k_mems[] = {
6373         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6374         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6375         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6376         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6377         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6378         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6379         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6380         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6381         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6382         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6383         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6384         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6385         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6386         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6387         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6388         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6389         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6390         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6391         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6392         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6393         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6394         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6395         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6396         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6397         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6398         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6399         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6400         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6401         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6402         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6403         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6404         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6405 };
6406
6407 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6408                                          struct ras_err_data *err_data)
6409 {
6410         uint32_t i, data;
6411         uint32_t sec_count, ded_count;
6412
6413         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6414         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6415         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6416         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6417         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6418         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6419         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6420         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6421
6422         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6423                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6424                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6425
6426                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6427                 if (sec_count) {
6428                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6429                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6430                         err_data->ce_count += sec_count;
6431                 }
6432
6433                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6434                 if (ded_count) {
6435                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6436                                 "DED %d\n", i, vml2_mems[i], ded_count);
6437                         err_data->ue_count += ded_count;
6438                 }
6439         }
6440
6441         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6442                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6443                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6444
6445                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6446                                                 SEC_COUNT);
6447                 if (sec_count) {
6448                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6449                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6450                         err_data->ce_count += sec_count;
6451                 }
6452
6453                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6454                                                 DED_COUNT);
6455                 if (ded_count) {
6456                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6457                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6458                         err_data->ue_count += ded_count;
6459                 }
6460         }
6461
6462         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6463                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6464                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6465
6466                 sec_count = (data & 0x00006000L) >> 0xd;
6467                 if (sec_count) {
6468                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6469                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6470                                 sec_count);
6471                         err_data->ce_count += sec_count;
6472                 }
6473         }
6474
6475         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6476                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6477                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6478
6479                 sec_count = (data & 0x00006000L) >> 0xd;
6480                 if (sec_count) {
6481                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6482                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6483                                 sec_count);
6484                         err_data->ce_count += sec_count;
6485                 }
6486
6487                 ded_count = (data & 0x00018000L) >> 0xf;
6488                 if (ded_count) {
6489                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6490                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6491                                 ded_count);
6492                         err_data->ue_count += ded_count;
6493                 }
6494         }
6495
6496         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6497         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6498         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6499         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6500
6501         return 0;
6502 }
6503
6504 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6505         const struct soc15_reg_entry *reg,
6506         uint32_t se_id, uint32_t inst_id, uint32_t value,
6507         uint32_t *sec_count, uint32_t *ded_count)
6508 {
6509         uint32_t i;
6510         uint32_t sec_cnt, ded_cnt;
6511
6512         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6513                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6514                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6515                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6516                         continue;
6517
6518                 sec_cnt = (value &
6519                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6520                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6521                 if (sec_cnt) {
6522                         dev_info(adev->dev, "GFX SubBlock %s, "
6523                                 "Instance[%d][%d], SEC %d\n",
6524                                 gfx_v9_0_ras_fields[i].name,
6525                                 se_id, inst_id,
6526                                 sec_cnt);
6527                         *sec_count += sec_cnt;
6528                 }
6529
6530                 ded_cnt = (value &
6531                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6532                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6533                 if (ded_cnt) {
6534                         dev_info(adev->dev, "GFX SubBlock %s, "
6535                                 "Instance[%d][%d], DED %d\n",
6536                                 gfx_v9_0_ras_fields[i].name,
6537                                 se_id, inst_id,
6538                                 ded_cnt);
6539                         *ded_count += ded_cnt;
6540                 }
6541         }
6542
6543         return 0;
6544 }
6545
6546 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6547 {
6548         int i, j, k;
6549
6550         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6551                 return;
6552
6553         /* read back registers to clear the counters */
6554         mutex_lock(&adev->grbm_idx_mutex);
6555         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6556                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6557                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6558                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6559                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6560                         }
6561                 }
6562         }
6563         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6564         mutex_unlock(&adev->grbm_idx_mutex);
6565
6566         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6567         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6568         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6569         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6570         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6571         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6572         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6573         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6574
6575         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6576                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6577                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6578         }
6579
6580         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6581                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6582                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6583         }
6584
6585         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6586                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6587                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6588         }
6589
6590         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6591                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6592                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6593         }
6594
6595         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6596         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6597         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6598         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6599 }
6600
6601 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6602                                           void *ras_error_status)
6603 {
6604         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6605         uint32_t sec_count = 0, ded_count = 0;
6606         uint32_t i, j, k;
6607         uint32_t reg_value;
6608
6609         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6610                 return -EINVAL;
6611
6612         err_data->ue_count = 0;
6613         err_data->ce_count = 0;
6614
6615         mutex_lock(&adev->grbm_idx_mutex);
6616
6617         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6618                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6619                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6620                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6621                                 reg_value =
6622                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6623                                 if (reg_value)
6624                                         gfx_v9_0_ras_error_count(adev,
6625                                                 &gfx_v9_0_edc_counter_regs[i],
6626                                                 j, k, reg_value,
6627                                                 &sec_count, &ded_count);
6628                         }
6629                 }
6630         }
6631
6632         err_data->ce_count += sec_count;
6633         err_data->ue_count += ded_count;
6634
6635         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6636         mutex_unlock(&adev->grbm_idx_mutex);
6637
6638         gfx_v9_0_query_utc_edc_status(adev, err_data);
6639
6640         return 0;
6641 }
6642
6643 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6644 {
6645         const unsigned int cp_coher_cntl =
6646                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6647                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6648                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6649                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6650                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6651
6652         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6653         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6654         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6655         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6656         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6657         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6658         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6659         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6660 }
6661
6662 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6663         .name = "gfx_v9_0",
6664         .early_init = gfx_v9_0_early_init,
6665         .late_init = gfx_v9_0_late_init,
6666         .sw_init = gfx_v9_0_sw_init,
6667         .sw_fini = gfx_v9_0_sw_fini,
6668         .hw_init = gfx_v9_0_hw_init,
6669         .hw_fini = gfx_v9_0_hw_fini,
6670         .suspend = gfx_v9_0_suspend,
6671         .resume = gfx_v9_0_resume,
6672         .is_idle = gfx_v9_0_is_idle,
6673         .wait_for_idle = gfx_v9_0_wait_for_idle,
6674         .soft_reset = gfx_v9_0_soft_reset,
6675         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6676         .set_powergating_state = gfx_v9_0_set_powergating_state,
6677         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6678 };
6679
6680 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6681         .type = AMDGPU_RING_TYPE_GFX,
6682         .align_mask = 0xff,
6683         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6684         .support_64bit_ptrs = true,
6685         .vmhub = AMDGPU_GFXHUB_0,
6686         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6687         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6688         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6689         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6690                 5 +  /* COND_EXEC */
6691                 7 +  /* PIPELINE_SYNC */
6692                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6693                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6694                 2 + /* VM_FLUSH */
6695                 8 +  /* FENCE for VM_FLUSH */
6696                 20 + /* GDS switch */
6697                 4 + /* double SWITCH_BUFFER,
6698                        the first COND_EXEC jump to the place just
6699                            prior to this double SWITCH_BUFFER  */
6700                 5 + /* COND_EXEC */
6701                 7 +      /*     HDP_flush */
6702                 4 +      /*     VGT_flush */
6703                 14 + /* CE_META */
6704                 31 + /* DE_META */
6705                 3 + /* CNTX_CTRL */
6706                 5 + /* HDP_INVL */
6707                 8 + 8 + /* FENCE x2 */
6708                 2 + /* SWITCH_BUFFER */
6709                 7, /* gfx_v9_0_emit_mem_sync */
6710         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6711         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6712         .emit_fence = gfx_v9_0_ring_emit_fence,
6713         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6714         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6715         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6716         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6717         .test_ring = gfx_v9_0_ring_test_ring,
6718         .test_ib = gfx_v9_0_ring_test_ib,
6719         .insert_nop = amdgpu_ring_insert_nop,
6720         .pad_ib = amdgpu_ring_generic_pad_ib,
6721         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6722         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6723         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6724         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6725         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6726         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6727         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6728         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6729         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6730         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6731 };
6732
6733 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6734         .type = AMDGPU_RING_TYPE_COMPUTE,
6735         .align_mask = 0xff,
6736         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6737         .support_64bit_ptrs = true,
6738         .vmhub = AMDGPU_GFXHUB_0,
6739         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6740         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6741         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6742         .emit_frame_size =
6743                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6744                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6745                 5 + /* hdp invalidate */
6746                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6747                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6748                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6749                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6750                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6751                 7, /* gfx_v9_0_emit_mem_sync */
6752         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6753         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6754         .emit_fence = gfx_v9_0_ring_emit_fence,
6755         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6756         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6757         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6758         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6759         .test_ring = gfx_v9_0_ring_test_ring,
6760         .test_ib = gfx_v9_0_ring_test_ib,
6761         .insert_nop = amdgpu_ring_insert_nop,
6762         .pad_ib = amdgpu_ring_generic_pad_ib,
6763         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6764         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6765         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6766         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6767 };
6768
6769 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6770         .type = AMDGPU_RING_TYPE_KIQ,
6771         .align_mask = 0xff,
6772         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6773         .support_64bit_ptrs = true,
6774         .vmhub = AMDGPU_GFXHUB_0,
6775         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6776         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6777         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6778         .emit_frame_size =
6779                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6780                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6781                 5 + /* hdp invalidate */
6782                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6783                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6784                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6785                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6786                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6787         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6788         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6789         .test_ring = gfx_v9_0_ring_test_ring,
6790         .insert_nop = amdgpu_ring_insert_nop,
6791         .pad_ib = amdgpu_ring_generic_pad_ib,
6792         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6793         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6794         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6795         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6796 };
6797
6798 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6799 {
6800         int i;
6801
6802         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6803
6804         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6805                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6806
6807         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6808                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6809 }
6810
6811 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6812         .set = gfx_v9_0_set_eop_interrupt_state,
6813         .process = gfx_v9_0_eop_irq,
6814 };
6815
6816 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6817         .set = gfx_v9_0_set_priv_reg_fault_state,
6818         .process = gfx_v9_0_priv_reg_irq,
6819 };
6820
6821 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6822         .set = gfx_v9_0_set_priv_inst_fault_state,
6823         .process = gfx_v9_0_priv_inst_irq,
6824 };
6825
6826 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6827         .set = gfx_v9_0_set_cp_ecc_error_state,
6828         .process = amdgpu_gfx_cp_ecc_error_irq,
6829 };
6830
6831
6832 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6833 {
6834         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6835         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6836
6837         adev->gfx.priv_reg_irq.num_types = 1;
6838         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6839
6840         adev->gfx.priv_inst_irq.num_types = 1;
6841         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6842
6843         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6844         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6845 }
6846
6847 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6848 {
6849         switch (adev->asic_type) {
6850         case CHIP_VEGA10:
6851         case CHIP_VEGA12:
6852         case CHIP_VEGA20:
6853         case CHIP_RAVEN:
6854         case CHIP_ARCTURUS:
6855         case CHIP_RENOIR:
6856                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6857                 break;
6858         default:
6859                 break;
6860         }
6861 }
6862
6863 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6864 {
6865         /* init asci gds info */
6866         switch (adev->asic_type) {
6867         case CHIP_VEGA10:
6868         case CHIP_VEGA12:
6869         case CHIP_VEGA20:
6870                 adev->gds.gds_size = 0x10000;
6871                 break;
6872         case CHIP_RAVEN:
6873         case CHIP_ARCTURUS:
6874                 adev->gds.gds_size = 0x1000;
6875                 break;
6876         default:
6877                 adev->gds.gds_size = 0x10000;
6878                 break;
6879         }
6880
6881         switch (adev->asic_type) {
6882         case CHIP_VEGA10:
6883         case CHIP_VEGA20:
6884                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6885                 break;
6886         case CHIP_VEGA12:
6887                 adev->gds.gds_compute_max_wave_id = 0x27f;
6888                 break;
6889         case CHIP_RAVEN:
6890                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
6891                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6892                 else
6893                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6894                 break;
6895         case CHIP_ARCTURUS:
6896                 adev->gds.gds_compute_max_wave_id = 0xfff;
6897                 break;
6898         default:
6899                 /* this really depends on the chip */
6900                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6901                 break;
6902         }
6903
6904         adev->gds.gws_size = 64;
6905         adev->gds.oa_size = 16;
6906 }
6907
6908 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6909                                                  u32 bitmap)
6910 {
6911         u32 data;
6912
6913         if (!bitmap)
6914                 return;
6915
6916         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6917         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6918
6919         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6920 }
6921
6922 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6923 {
6924         u32 data, mask;
6925
6926         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6927         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6928
6929         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6930         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6931
6932         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6933
6934         return (~data) & mask;
6935 }
6936
6937 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6938                                  struct amdgpu_cu_info *cu_info)
6939 {
6940         int i, j, k, counter, active_cu_number = 0;
6941         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6942         unsigned disable_masks[4 * 4];
6943
6944         if (!adev || !cu_info)
6945                 return -EINVAL;
6946
6947         /*
6948          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6949          */
6950         if (adev->gfx.config.max_shader_engines *
6951                 adev->gfx.config.max_sh_per_se > 16)
6952                 return -EINVAL;
6953
6954         amdgpu_gfx_parse_disable_cu(disable_masks,
6955                                     adev->gfx.config.max_shader_engines,
6956                                     adev->gfx.config.max_sh_per_se);
6957
6958         mutex_lock(&adev->grbm_idx_mutex);
6959         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6960                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6961                         mask = 1;
6962                         ao_bitmap = 0;
6963                         counter = 0;
6964                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6965                         gfx_v9_0_set_user_cu_inactive_bitmap(
6966                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6967                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6968
6969                         /*
6970                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6971                          * 4x4 size array, and it's usually suitable for Vega
6972                          * ASICs which has 4*2 SE/SH layout.
6973                          * But for Arcturus, SE/SH layout is changed to 8*1.
6974                          * To mostly reduce the impact, we make it compatible
6975                          * with current bitmap array as below:
6976                          *    SE4,SH0 --> bitmap[0][1]
6977                          *    SE5,SH0 --> bitmap[1][1]
6978                          *    SE6,SH0 --> bitmap[2][1]
6979                          *    SE7,SH0 --> bitmap[3][1]
6980                          */
6981                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6982
6983                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6984                                 if (bitmap & mask) {
6985                                         if (counter < adev->gfx.config.max_cu_per_sh)
6986                                                 ao_bitmap |= mask;
6987                                         counter ++;
6988                                 }
6989                                 mask <<= 1;
6990                         }
6991                         active_cu_number += counter;
6992                         if (i < 2 && j < 2)
6993                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6994                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6995                 }
6996         }
6997         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6998         mutex_unlock(&adev->grbm_idx_mutex);
6999
7000         cu_info->number = active_cu_number;
7001         cu_info->ao_cu_mask = ao_cu_mask;
7002         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7003
7004         return 0;
7005 }
7006
7007 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7008 {
7009         .type = AMD_IP_BLOCK_TYPE_GFX,
7010         .major = 9,
7011         .minor = 0,
7012         .rev = 0,
7013         .funcs = &gfx_v9_0_ip_funcs,
7014 };