aeaee75020157da9e02e2097fbb29427ff209b36
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52
53 #define GFX9_NUM_GFX_RINGS     1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
57
58 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
64
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
122
123 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
133 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
135
136 enum ta_ras_gfx_subblock {
137         /*CPC*/
138         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
139         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
140         TA_RAS_BLOCK__GFX_CPC_UCODE,
141         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
142         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
143         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
144         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
145         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
146         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
148         /* CPF*/
149         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
151         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
152         TA_RAS_BLOCK__GFX_CPF_TAG,
153         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
154         /* CPG*/
155         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
157         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
158         TA_RAS_BLOCK__GFX_CPG_TAG,
159         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
160         /* GDS*/
161         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
163         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
164         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
165         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
166         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
168         /* SPI*/
169         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
170         /* SQ*/
171         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
173         TA_RAS_BLOCK__GFX_SQ_LDS_D,
174         TA_RAS_BLOCK__GFX_SQ_LDS_I,
175         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
176         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
177         /* SQC (3 ranges)*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         /* SQC range 0*/
180         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
181         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
182                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
188         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
190                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
191         /* SQC range 1*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
201         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
204                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
205         /* SQC range 2*/
206         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
208                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
215         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
216         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
218                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
219         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
220         /* TA*/
221         TA_RAS_BLOCK__GFX_TA_INDEX_START,
222         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
223         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
224         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
225         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
226         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
228         /* TCA*/
229         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
231         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
233         /* TCC (5 sub-ranges)*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         /* TCC range 0*/
236         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
241         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
242         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
243         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
244         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
246         /* TCC range 1*/
247         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
249         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
251                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
252         /* TCC range 2*/
253         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
255         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
256         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
257         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
258         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
259         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
261         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
263                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
264         /* TCC range 3*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
267         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
269                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
270         /* TCC range 4*/
271         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
273                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
274         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
276                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
277         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
278         /* TCI*/
279         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
280         /* TCP*/
281         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
283         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
284         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
285         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
286         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
287         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
288         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
290         /* TD*/
291         TA_RAS_BLOCK__GFX_TD_INDEX_START,
292         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
293         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
294         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
296         /* EA (3 sub-ranges)*/
297         TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         /* EA range 0*/
299         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
300         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
301         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
303         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
304         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
306         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
307         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
309         /* EA range 1*/
310         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
315         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
316         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
319         /* EA range 2*/
320         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
322         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
323         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
324         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
326         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
327         /* UTC VM L2 bank*/
328         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
329         /* UTC VM walker*/
330         TA_RAS_BLOCK__UTC_VML2_WALKER,
331         /* UTC ATC L2 2MB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
333         /* UTC ATC L2 4KB cache*/
334         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
335         TA_RAS_BLOCK__GFX_MAX
336 };
337
338 struct ras_gfx_subblock {
339         unsigned char *name;
340         int ta_subblock;
341         int hw_supported_error_type;
342         int sw_supported_error_type;
343 };
344
345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
346         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
347                 #subblock,                                                     \
348                 TA_RAS_BLOCK__##subblock,                                      \
349                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
350                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
351         }
352
353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
354         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
355         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
373                              0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382                              0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
384                              0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390                              0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
394                              1),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
396                              0, 0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
408                              0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
414                              0, 0, 0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
426                              0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
442                              1),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
446                              0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
459                              0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
464                              0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
466                              0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
476         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 };
502
503 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
504 {
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 };
526
527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
528 {
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 };
548
549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
550 {
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 };
563
564 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
565 {
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 };
591
592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
593 {
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 };
602
603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
604 {
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 };
625
626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
627 {
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 };
685
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 };
699
700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
701 {
702         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 };
711
712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
713 {
714         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 };
723
724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
728
729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
734                                  struct amdgpu_cu_info *cu_info);
735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
740                                           void *ras_error_status);
741 static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev);
742 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
743                                      void *inject_if);
744
745 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
746                                 uint64_t queue_mask)
747 {
748         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
749         amdgpu_ring_write(kiq_ring,
750                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
751                 /* vmid_mask:0* queue_type:0 (KIQ) */
752                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
753         amdgpu_ring_write(kiq_ring,
754                         lower_32_bits(queue_mask));     /* queue mask lo */
755         amdgpu_ring_write(kiq_ring,
756                         upper_32_bits(queue_mask));     /* queue mask hi */
757         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
758         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
759         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
760         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
761 }
762
763 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
764                                  struct amdgpu_ring *ring)
765 {
766         struct amdgpu_device *adev = kiq_ring->adev;
767         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
768         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
769         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
770
771         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
772         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
773         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
774                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
775                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
776                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
777                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
778                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
779                          /*queue_type: normal compute queue */
780                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
781                          /* alloc format: all_on_one_pipe */
782                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
783                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
784                          /* num_queues: must be 1 */
785                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
786         amdgpu_ring_write(kiq_ring,
787                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
788         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
789         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
790         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
791         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
792 }
793
794 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
795                                    struct amdgpu_ring *ring,
796                                    enum amdgpu_unmap_queues_action action,
797                                    u64 gpu_addr, u64 seq)
798 {
799         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
800
801         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
802         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
803                           PACKET3_UNMAP_QUEUES_ACTION(action) |
804                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
805                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
806                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
807         amdgpu_ring_write(kiq_ring,
808                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
809
810         if (action == PREEMPT_QUEUES_NO_UNMAP) {
811                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
812                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
813                 amdgpu_ring_write(kiq_ring, seq);
814         } else {
815                 amdgpu_ring_write(kiq_ring, 0);
816                 amdgpu_ring_write(kiq_ring, 0);
817                 amdgpu_ring_write(kiq_ring, 0);
818         }
819 }
820
821 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
822                                    struct amdgpu_ring *ring,
823                                    u64 addr,
824                                    u64 seq)
825 {
826         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
827
828         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
829         amdgpu_ring_write(kiq_ring,
830                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
831                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
832                           PACKET3_QUERY_STATUS_COMMAND(2));
833         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
834         amdgpu_ring_write(kiq_ring,
835                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
836                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
837         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
838         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
839         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
840         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
841 }
842
843 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
844                                 uint16_t pasid, uint32_t flush_type,
845                                 bool all_hub)
846 {
847         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
848         amdgpu_ring_write(kiq_ring,
849                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
850                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
851                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
852                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
853 }
854
855 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
856         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
857         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
858         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
859         .kiq_query_status = gfx_v9_0_kiq_query_status,
860         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
861         .set_resources_size = 8,
862         .map_queues_size = 7,
863         .unmap_queues_size = 6,
864         .query_status_size = 7,
865         .invalidate_tlbs_size = 2,
866 };
867
868 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
869 {
870         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
871 }
872
873 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
874 {
875         switch (adev->asic_type) {
876         case CHIP_VEGA10:
877                 soc15_program_register_sequence(adev,
878                                                 golden_settings_gc_9_0,
879                                                 ARRAY_SIZE(golden_settings_gc_9_0));
880                 soc15_program_register_sequence(adev,
881                                                 golden_settings_gc_9_0_vg10,
882                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
883                 break;
884         case CHIP_VEGA12:
885                 soc15_program_register_sequence(adev,
886                                                 golden_settings_gc_9_2_1,
887                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
888                 soc15_program_register_sequence(adev,
889                                                 golden_settings_gc_9_2_1_vg12,
890                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
891                 break;
892         case CHIP_VEGA20:
893                 soc15_program_register_sequence(adev,
894                                                 golden_settings_gc_9_0,
895                                                 ARRAY_SIZE(golden_settings_gc_9_0));
896                 soc15_program_register_sequence(adev,
897                                                 golden_settings_gc_9_0_vg20,
898                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
899                 break;
900         case CHIP_ARCTURUS:
901                 soc15_program_register_sequence(adev,
902                                                 golden_settings_gc_9_4_1_arct,
903                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
904                 break;
905         case CHIP_RAVEN:
906                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
907                                                 ARRAY_SIZE(golden_settings_gc_9_1));
908                 if (adev->rev_id >= 8)
909                         soc15_program_register_sequence(adev,
910                                                         golden_settings_gc_9_1_rv2,
911                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
912                 else
913                         soc15_program_register_sequence(adev,
914                                                         golden_settings_gc_9_1_rv1,
915                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
916                 break;
917          case CHIP_RENOIR:
918                 soc15_program_register_sequence(adev,
919                                                 golden_settings_gc_9_1_rn,
920                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
921                 return; /* for renoir, don't need common goldensetting */
922         default:
923                 break;
924         }
925
926         if (adev->asic_type != CHIP_ARCTURUS)
927                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
928                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
929 }
930
931 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
932 {
933         adev->gfx.scratch.num_reg = 8;
934         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
935         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
936 }
937
938 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
939                                        bool wc, uint32_t reg, uint32_t val)
940 {
941         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
942         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
943                                 WRITE_DATA_DST_SEL(0) |
944                                 (wc ? WR_CONFIRM : 0));
945         amdgpu_ring_write(ring, reg);
946         amdgpu_ring_write(ring, 0);
947         amdgpu_ring_write(ring, val);
948 }
949
950 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
951                                   int mem_space, int opt, uint32_t addr0,
952                                   uint32_t addr1, uint32_t ref, uint32_t mask,
953                                   uint32_t inv)
954 {
955         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
956         amdgpu_ring_write(ring,
957                                  /* memory (1) or register (0) */
958                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
959                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
960                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
961                                  WAIT_REG_MEM_ENGINE(eng_sel)));
962
963         if (mem_space)
964                 BUG_ON(addr0 & 0x3); /* Dword align */
965         amdgpu_ring_write(ring, addr0);
966         amdgpu_ring_write(ring, addr1);
967         amdgpu_ring_write(ring, ref);
968         amdgpu_ring_write(ring, mask);
969         amdgpu_ring_write(ring, inv); /* poll interval */
970 }
971
972 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
973 {
974         struct amdgpu_device *adev = ring->adev;
975         uint32_t scratch;
976         uint32_t tmp = 0;
977         unsigned i;
978         int r;
979
980         r = amdgpu_gfx_scratch_get(adev, &scratch);
981         if (r)
982                 return r;
983
984         WREG32(scratch, 0xCAFEDEAD);
985         r = amdgpu_ring_alloc(ring, 3);
986         if (r)
987                 goto error_free_scratch;
988
989         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
990         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
991         amdgpu_ring_write(ring, 0xDEADBEEF);
992         amdgpu_ring_commit(ring);
993
994         for (i = 0; i < adev->usec_timeout; i++) {
995                 tmp = RREG32(scratch);
996                 if (tmp == 0xDEADBEEF)
997                         break;
998                 udelay(1);
999         }
1000
1001         if (i >= adev->usec_timeout)
1002                 r = -ETIMEDOUT;
1003
1004 error_free_scratch:
1005         amdgpu_gfx_scratch_free(adev, scratch);
1006         return r;
1007 }
1008
1009 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1010 {
1011         struct amdgpu_device *adev = ring->adev;
1012         struct amdgpu_ib ib;
1013         struct dma_fence *f = NULL;
1014
1015         unsigned index;
1016         uint64_t gpu_addr;
1017         uint32_t tmp;
1018         long r;
1019
1020         r = amdgpu_device_wb_get(adev, &index);
1021         if (r)
1022                 return r;
1023
1024         gpu_addr = adev->wb.gpu_addr + (index * 4);
1025         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1026         memset(&ib, 0, sizeof(ib));
1027         r = amdgpu_ib_get(adev, NULL, 16, &ib);
1028         if (r)
1029                 goto err1;
1030
1031         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1032         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1033         ib.ptr[2] = lower_32_bits(gpu_addr);
1034         ib.ptr[3] = upper_32_bits(gpu_addr);
1035         ib.ptr[4] = 0xDEADBEEF;
1036         ib.length_dw = 5;
1037
1038         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1039         if (r)
1040                 goto err2;
1041
1042         r = dma_fence_wait_timeout(f, false, timeout);
1043         if (r == 0) {
1044                 r = -ETIMEDOUT;
1045                 goto err2;
1046         } else if (r < 0) {
1047                 goto err2;
1048         }
1049
1050         tmp = adev->wb.wb[index];
1051         if (tmp == 0xDEADBEEF)
1052                 r = 0;
1053         else
1054                 r = -EINVAL;
1055
1056 err2:
1057         amdgpu_ib_free(adev, &ib, NULL);
1058         dma_fence_put(f);
1059 err1:
1060         amdgpu_device_wb_free(adev, index);
1061         return r;
1062 }
1063
1064
1065 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1066 {
1067         release_firmware(adev->gfx.pfp_fw);
1068         adev->gfx.pfp_fw = NULL;
1069         release_firmware(adev->gfx.me_fw);
1070         adev->gfx.me_fw = NULL;
1071         release_firmware(adev->gfx.ce_fw);
1072         adev->gfx.ce_fw = NULL;
1073         release_firmware(adev->gfx.rlc_fw);
1074         adev->gfx.rlc_fw = NULL;
1075         release_firmware(adev->gfx.mec_fw);
1076         adev->gfx.mec_fw = NULL;
1077         release_firmware(adev->gfx.mec2_fw);
1078         adev->gfx.mec2_fw = NULL;
1079
1080         kfree(adev->gfx.rlc.register_list_format);
1081 }
1082
1083 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1084 {
1085         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1086
1087         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1088         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1089         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1090         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1091         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1092         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1093         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1094         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1095         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1096         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1097         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1098         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1099         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1100         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1101                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1102 }
1103
1104 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1105 {
1106         adev->gfx.me_fw_write_wait = false;
1107         adev->gfx.mec_fw_write_wait = false;
1108
1109         if ((adev->asic_type != CHIP_ARCTURUS) &&
1110             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1111             (adev->gfx.mec_feature_version < 46) ||
1112             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1113             (adev->gfx.pfp_feature_version < 46)))
1114                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1115
1116         switch (adev->asic_type) {
1117         case CHIP_VEGA10:
1118                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1119                     (adev->gfx.me_feature_version >= 42) &&
1120                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1121                     (adev->gfx.pfp_feature_version >= 42))
1122                         adev->gfx.me_fw_write_wait = true;
1123
1124                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1125                     (adev->gfx.mec_feature_version >= 42))
1126                         adev->gfx.mec_fw_write_wait = true;
1127                 break;
1128         case CHIP_VEGA12:
1129                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1130                     (adev->gfx.me_feature_version >= 44) &&
1131                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1132                     (adev->gfx.pfp_feature_version >= 44))
1133                         adev->gfx.me_fw_write_wait = true;
1134
1135                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1136                     (adev->gfx.mec_feature_version >= 44))
1137                         adev->gfx.mec_fw_write_wait = true;
1138                 break;
1139         case CHIP_VEGA20:
1140                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1141                     (adev->gfx.me_feature_version >= 44) &&
1142                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1143                     (adev->gfx.pfp_feature_version >= 44))
1144                         adev->gfx.me_fw_write_wait = true;
1145
1146                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1147                     (adev->gfx.mec_feature_version >= 44))
1148                         adev->gfx.mec_fw_write_wait = true;
1149                 break;
1150         case CHIP_RAVEN:
1151                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1152                     (adev->gfx.me_feature_version >= 42) &&
1153                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1154                     (adev->gfx.pfp_feature_version >= 42))
1155                         adev->gfx.me_fw_write_wait = true;
1156
1157                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1158                     (adev->gfx.mec_feature_version >= 42))
1159                         adev->gfx.mec_fw_write_wait = true;
1160                 break;
1161         default:
1162                 break;
1163         }
1164 }
1165
1166 struct amdgpu_gfxoff_quirk {
1167         u16 chip_vendor;
1168         u16 chip_device;
1169         u16 subsys_vendor;
1170         u16 subsys_device;
1171         u8 revision;
1172 };
1173
1174 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1175         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1176         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1177         { 0, 0, 0, 0, 0 },
1178 };
1179
1180 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1181 {
1182         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1183
1184         while (p && p->chip_device != 0) {
1185                 if (pdev->vendor == p->chip_vendor &&
1186                     pdev->device == p->chip_device &&
1187                     pdev->subsystem_vendor == p->subsys_vendor &&
1188                     pdev->subsystem_device == p->subsys_device &&
1189                     pdev->revision == p->revision) {
1190                         return true;
1191                 }
1192                 ++p;
1193         }
1194         return false;
1195 }
1196
1197 static bool is_raven_kicker(struct amdgpu_device *adev)
1198 {
1199         if (adev->pm.fw_version >= 0x41e2b)
1200                 return true;
1201         else
1202                 return false;
1203 }
1204
1205 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1206 {
1207         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1208                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1209
1210         switch (adev->asic_type) {
1211         case CHIP_VEGA10:
1212         case CHIP_VEGA12:
1213         case CHIP_VEGA20:
1214                 break;
1215         case CHIP_RAVEN:
1216                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1217                     ((!is_raven_kicker(adev) &&
1218                       adev->gfx.rlc_fw_version < 531) ||
1219                      (adev->gfx.rlc_feature_version < 1) ||
1220                      !adev->gfx.rlc.is_rlc_v2_1))
1221                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1222
1223                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1224                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1225                                 AMD_PG_SUPPORT_CP |
1226                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1227                 break;
1228         case CHIP_RENOIR:
1229                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1230                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1231                                 AMD_PG_SUPPORT_CP |
1232                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1233                 break;
1234         default:
1235                 break;
1236         }
1237 }
1238
1239 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1240                                           const char *chip_name)
1241 {
1242         char fw_name[30];
1243         int err;
1244         struct amdgpu_firmware_info *info = NULL;
1245         const struct common_firmware_header *header = NULL;
1246         const struct gfx_firmware_header_v1_0 *cp_hdr;
1247
1248         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1249         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1250         if (err)
1251                 goto out;
1252         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1253         if (err)
1254                 goto out;
1255         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1256         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1257         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1258
1259         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1260         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1261         if (err)
1262                 goto out;
1263         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1264         if (err)
1265                 goto out;
1266         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1267         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1268         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1269
1270         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1271         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1272         if (err)
1273                 goto out;
1274         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1275         if (err)
1276                 goto out;
1277         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1278         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1279         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1280
1281         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1282                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1283                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1284                 info->fw = adev->gfx.pfp_fw;
1285                 header = (const struct common_firmware_header *)info->fw->data;
1286                 adev->firmware.fw_size +=
1287                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1288
1289                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1290                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1291                 info->fw = adev->gfx.me_fw;
1292                 header = (const struct common_firmware_header *)info->fw->data;
1293                 adev->firmware.fw_size +=
1294                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1295
1296                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1297                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1298                 info->fw = adev->gfx.ce_fw;
1299                 header = (const struct common_firmware_header *)info->fw->data;
1300                 adev->firmware.fw_size +=
1301                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1302         }
1303
1304 out:
1305         if (err) {
1306                 dev_err(adev->dev,
1307                         "gfx9: Failed to load firmware \"%s\"\n",
1308                         fw_name);
1309                 release_firmware(adev->gfx.pfp_fw);
1310                 adev->gfx.pfp_fw = NULL;
1311                 release_firmware(adev->gfx.me_fw);
1312                 adev->gfx.me_fw = NULL;
1313                 release_firmware(adev->gfx.ce_fw);
1314                 adev->gfx.ce_fw = NULL;
1315         }
1316         return err;
1317 }
1318
1319 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1320                                           const char *chip_name)
1321 {
1322         char fw_name[30];
1323         int err;
1324         struct amdgpu_firmware_info *info = NULL;
1325         const struct common_firmware_header *header = NULL;
1326         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1327         unsigned int *tmp = NULL;
1328         unsigned int i = 0;
1329         uint16_t version_major;
1330         uint16_t version_minor;
1331         uint32_t smu_version;
1332
1333         /*
1334          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1335          * instead of picasso_rlc.bin.
1336          * Judgment method:
1337          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1338          *          or revision >= 0xD8 && revision <= 0xDF
1339          * otherwise is PCO FP5
1340          */
1341         if (!strcmp(chip_name, "picasso") &&
1342                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1343                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1344                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1345         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1346                 (smu_version >= 0x41e2b))
1347                 /**
1348                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1349                 */
1350                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1351         else
1352                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1353         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1354         if (err)
1355                 goto out;
1356         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1357         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1358
1359         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1360         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1361         if (version_major == 2 && version_minor == 1)
1362                 adev->gfx.rlc.is_rlc_v2_1 = true;
1363
1364         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1365         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1366         adev->gfx.rlc.save_and_restore_offset =
1367                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1368         adev->gfx.rlc.clear_state_descriptor_offset =
1369                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1370         adev->gfx.rlc.avail_scratch_ram_locations =
1371                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1372         adev->gfx.rlc.reg_restore_list_size =
1373                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1374         adev->gfx.rlc.reg_list_format_start =
1375                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1376         adev->gfx.rlc.reg_list_format_separate_start =
1377                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1378         adev->gfx.rlc.starting_offsets_start =
1379                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1380         adev->gfx.rlc.reg_list_format_size_bytes =
1381                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1382         adev->gfx.rlc.reg_list_size_bytes =
1383                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1384         adev->gfx.rlc.register_list_format =
1385                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1386                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1387         if (!adev->gfx.rlc.register_list_format) {
1388                 err = -ENOMEM;
1389                 goto out;
1390         }
1391
1392         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1393                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1394         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1395                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1396
1397         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1398
1399         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1400                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1401         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1402                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1403
1404         if (adev->gfx.rlc.is_rlc_v2_1)
1405                 gfx_v9_0_init_rlc_ext_microcode(adev);
1406
1407         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1408                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1409                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1410                 info->fw = adev->gfx.rlc_fw;
1411                 header = (const struct common_firmware_header *)info->fw->data;
1412                 adev->firmware.fw_size +=
1413                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1414
1415                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1416                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1417                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1418                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1419                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1420                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1421                         info->fw = adev->gfx.rlc_fw;
1422                         adev->firmware.fw_size +=
1423                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1424
1425                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1426                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1427                         info->fw = adev->gfx.rlc_fw;
1428                         adev->firmware.fw_size +=
1429                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1430
1431                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1432                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1433                         info->fw = adev->gfx.rlc_fw;
1434                         adev->firmware.fw_size +=
1435                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1436                 }
1437         }
1438
1439 out:
1440         if (err) {
1441                 dev_err(adev->dev,
1442                         "gfx9: Failed to load firmware \"%s\"\n",
1443                         fw_name);
1444                 release_firmware(adev->gfx.rlc_fw);
1445                 adev->gfx.rlc_fw = NULL;
1446         }
1447         return err;
1448 }
1449
1450 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1451                                           const char *chip_name)
1452 {
1453         char fw_name[30];
1454         int err;
1455         struct amdgpu_firmware_info *info = NULL;
1456         const struct common_firmware_header *header = NULL;
1457         const struct gfx_firmware_header_v1_0 *cp_hdr;
1458
1459         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1460         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1461         if (err)
1462                 goto out;
1463         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1464         if (err)
1465                 goto out;
1466         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1467         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1468         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1469
1470
1471         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1472         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1473         if (!err) {
1474                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1475                 if (err)
1476                         goto out;
1477                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1478                 adev->gfx.mec2_fw->data;
1479                 adev->gfx.mec2_fw_version =
1480                 le32_to_cpu(cp_hdr->header.ucode_version);
1481                 adev->gfx.mec2_feature_version =
1482                 le32_to_cpu(cp_hdr->ucode_feature_version);
1483         } else {
1484                 err = 0;
1485                 adev->gfx.mec2_fw = NULL;
1486         }
1487
1488         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1489                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1490                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1491                 info->fw = adev->gfx.mec_fw;
1492                 header = (const struct common_firmware_header *)info->fw->data;
1493                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1494                 adev->firmware.fw_size +=
1495                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1496
1497                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1498                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1499                 info->fw = adev->gfx.mec_fw;
1500                 adev->firmware.fw_size +=
1501                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1502
1503                 if (adev->gfx.mec2_fw) {
1504                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1505                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1506                         info->fw = adev->gfx.mec2_fw;
1507                         header = (const struct common_firmware_header *)info->fw->data;
1508                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1509                         adev->firmware.fw_size +=
1510                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1511
1512                         /* TODO: Determine if MEC2 JT FW loading can be removed
1513                                  for all GFX V9 asic and above */
1514                         if (adev->asic_type != CHIP_ARCTURUS &&
1515                             adev->asic_type != CHIP_RENOIR) {
1516                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1517                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1518                                 info->fw = adev->gfx.mec2_fw;
1519                                 adev->firmware.fw_size +=
1520                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1521                                         PAGE_SIZE);
1522                         }
1523                 }
1524         }
1525
1526 out:
1527         gfx_v9_0_check_if_need_gfxoff(adev);
1528         gfx_v9_0_check_fw_write_wait(adev);
1529         if (err) {
1530                 dev_err(adev->dev,
1531                         "gfx9: Failed to load firmware \"%s\"\n",
1532                         fw_name);
1533                 release_firmware(adev->gfx.mec_fw);
1534                 adev->gfx.mec_fw = NULL;
1535                 release_firmware(adev->gfx.mec2_fw);
1536                 adev->gfx.mec2_fw = NULL;
1537         }
1538         return err;
1539 }
1540
1541 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1542 {
1543         const char *chip_name;
1544         int r;
1545
1546         DRM_DEBUG("\n");
1547
1548         switch (adev->asic_type) {
1549         case CHIP_VEGA10:
1550                 chip_name = "vega10";
1551                 break;
1552         case CHIP_VEGA12:
1553                 chip_name = "vega12";
1554                 break;
1555         case CHIP_VEGA20:
1556                 chip_name = "vega20";
1557                 break;
1558         case CHIP_RAVEN:
1559                 if (adev->rev_id >= 8)
1560                         chip_name = "raven2";
1561                 else if (adev->pdev->device == 0x15d8)
1562                         chip_name = "picasso";
1563                 else
1564                         chip_name = "raven";
1565                 break;
1566         case CHIP_ARCTURUS:
1567                 chip_name = "arcturus";
1568                 break;
1569         case CHIP_RENOIR:
1570                 chip_name = "renoir";
1571                 break;
1572         default:
1573                 BUG();
1574         }
1575
1576         /* No CPG in Arcturus */
1577         if (adev->asic_type != CHIP_ARCTURUS) {
1578                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1579                 if (r)
1580                         return r;
1581         }
1582
1583         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1584         if (r)
1585                 return r;
1586
1587         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1588         if (r)
1589                 return r;
1590
1591         return r;
1592 }
1593
1594 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1595 {
1596         u32 count = 0;
1597         const struct cs_section_def *sect = NULL;
1598         const struct cs_extent_def *ext = NULL;
1599
1600         /* begin clear state */
1601         count += 2;
1602         /* context control state */
1603         count += 3;
1604
1605         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1606                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1607                         if (sect->id == SECT_CONTEXT)
1608                                 count += 2 + ext->reg_count;
1609                         else
1610                                 return 0;
1611                 }
1612         }
1613
1614         /* end clear state */
1615         count += 2;
1616         /* clear state */
1617         count += 2;
1618
1619         return count;
1620 }
1621
1622 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1623                                     volatile u32 *buffer)
1624 {
1625         u32 count = 0, i;
1626         const struct cs_section_def *sect = NULL;
1627         const struct cs_extent_def *ext = NULL;
1628
1629         if (adev->gfx.rlc.cs_data == NULL)
1630                 return;
1631         if (buffer == NULL)
1632                 return;
1633
1634         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1635         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1636
1637         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1638         buffer[count++] = cpu_to_le32(0x80000000);
1639         buffer[count++] = cpu_to_le32(0x80000000);
1640
1641         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1642                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1643                         if (sect->id == SECT_CONTEXT) {
1644                                 buffer[count++] =
1645                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1646                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1647                                                 PACKET3_SET_CONTEXT_REG_START);
1648                                 for (i = 0; i < ext->reg_count; i++)
1649                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1650                         } else {
1651                                 return;
1652                         }
1653                 }
1654         }
1655
1656         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1657         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1658
1659         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1660         buffer[count++] = cpu_to_le32(0);
1661 }
1662
1663 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1664 {
1665         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1666         uint32_t pg_always_on_cu_num = 2;
1667         uint32_t always_on_cu_num;
1668         uint32_t i, j, k;
1669         uint32_t mask, cu_bitmap, counter;
1670
1671         if (adev->flags & AMD_IS_APU)
1672                 always_on_cu_num = 4;
1673         else if (adev->asic_type == CHIP_VEGA12)
1674                 always_on_cu_num = 8;
1675         else
1676                 always_on_cu_num = 12;
1677
1678         mutex_lock(&adev->grbm_idx_mutex);
1679         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1680                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1681                         mask = 1;
1682                         cu_bitmap = 0;
1683                         counter = 0;
1684                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1685
1686                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1687                                 if (cu_info->bitmap[i][j] & mask) {
1688                                         if (counter == pg_always_on_cu_num)
1689                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1690                                         if (counter < always_on_cu_num)
1691                                                 cu_bitmap |= mask;
1692                                         else
1693                                                 break;
1694                                         counter++;
1695                                 }
1696                                 mask <<= 1;
1697                         }
1698
1699                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1700                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1701                 }
1702         }
1703         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1704         mutex_unlock(&adev->grbm_idx_mutex);
1705 }
1706
1707 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1708 {
1709         uint32_t data;
1710
1711         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1712         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1713         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1714         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1715         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1716
1717         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1718         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1719
1720         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1721         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1722
1723         mutex_lock(&adev->grbm_idx_mutex);
1724         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1725         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1726         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1727
1728         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1729         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1730         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1731         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1732         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1733
1734         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1735         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1736         data &= 0x0000FFFF;
1737         data |= 0x00C00000;
1738         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1739
1740         /*
1741          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1742          * programmed in gfx_v9_0_init_always_on_cu_mask()
1743          */
1744
1745         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1746          * but used for RLC_LB_CNTL configuration */
1747         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1748         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1749         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1750         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1751         mutex_unlock(&adev->grbm_idx_mutex);
1752
1753         gfx_v9_0_init_always_on_cu_mask(adev);
1754 }
1755
1756 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1757 {
1758         uint32_t data;
1759
1760         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1761         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1762         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1763         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1764         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1765
1766         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1767         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1768
1769         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1770         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1771
1772         mutex_lock(&adev->grbm_idx_mutex);
1773         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1774         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1775         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1776
1777         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1778         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1779         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1780         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1781         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1782
1783         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1784         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1785         data &= 0x0000FFFF;
1786         data |= 0x00C00000;
1787         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1788
1789         /*
1790          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1791          * programmed in gfx_v9_0_init_always_on_cu_mask()
1792          */
1793
1794         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1795          * but used for RLC_LB_CNTL configuration */
1796         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1797         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1798         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1799         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1800         mutex_unlock(&adev->grbm_idx_mutex);
1801
1802         gfx_v9_0_init_always_on_cu_mask(adev);
1803 }
1804
1805 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1806 {
1807         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1808 }
1809
1810 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1811 {
1812         return 5;
1813 }
1814
1815 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1816 {
1817         const struct cs_section_def *cs_data;
1818         int r;
1819
1820         adev->gfx.rlc.cs_data = gfx9_cs_data;
1821
1822         cs_data = adev->gfx.rlc.cs_data;
1823
1824         if (cs_data) {
1825                 /* init clear state block */
1826                 r = amdgpu_gfx_rlc_init_csb(adev);
1827                 if (r)
1828                         return r;
1829         }
1830
1831         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1832                 /* TODO: double check the cp_table_size for RV */
1833                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1834                 r = amdgpu_gfx_rlc_init_cpt(adev);
1835                 if (r)
1836                         return r;
1837         }
1838
1839         switch (adev->asic_type) {
1840         case CHIP_RAVEN:
1841                 gfx_v9_0_init_lbpw(adev);
1842                 break;
1843         case CHIP_VEGA20:
1844                 gfx_v9_4_init_lbpw(adev);
1845                 break;
1846         default:
1847                 break;
1848         }
1849
1850         return 0;
1851 }
1852
1853 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1854 {
1855         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1856         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1857 }
1858
1859 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1860 {
1861         int r;
1862         u32 *hpd;
1863         const __le32 *fw_data;
1864         unsigned fw_size;
1865         u32 *fw;
1866         size_t mec_hpd_size;
1867
1868         const struct gfx_firmware_header_v1_0 *mec_hdr;
1869
1870         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1871
1872         /* take ownership of the relevant compute queues */
1873         amdgpu_gfx_compute_queue_acquire(adev);
1874         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1875
1876         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1877                                       AMDGPU_GEM_DOMAIN_VRAM,
1878                                       &adev->gfx.mec.hpd_eop_obj,
1879                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1880                                       (void **)&hpd);
1881         if (r) {
1882                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1883                 gfx_v9_0_mec_fini(adev);
1884                 return r;
1885         }
1886
1887         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1888
1889         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1890         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1891
1892         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1893
1894         fw_data = (const __le32 *)
1895                 (adev->gfx.mec_fw->data +
1896                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1897         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1898
1899         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1900                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1901                                       &adev->gfx.mec.mec_fw_obj,
1902                                       &adev->gfx.mec.mec_fw_gpu_addr,
1903                                       (void **)&fw);
1904         if (r) {
1905                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1906                 gfx_v9_0_mec_fini(adev);
1907                 return r;
1908         }
1909
1910         memcpy(fw, fw_data, fw_size);
1911
1912         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1913         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1914
1915         return 0;
1916 }
1917
1918 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1919 {
1920         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1921                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1922                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1923                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1924                 (SQ_IND_INDEX__FORCE_READ_MASK));
1925         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1926 }
1927
1928 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1929                            uint32_t wave, uint32_t thread,
1930                            uint32_t regno, uint32_t num, uint32_t *out)
1931 {
1932         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1933                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1934                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1935                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1936                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1937                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1938                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1939         while (num--)
1940                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1941 }
1942
1943 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1944 {
1945         /* type 1 wave data */
1946         dst[(*no_fields)++] = 1;
1947         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1948         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1949         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1950         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1951         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1952         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1953         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1954         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1955         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1956         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1957         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1958         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1959         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1960         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1961 }
1962
1963 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1964                                      uint32_t wave, uint32_t start,
1965                                      uint32_t size, uint32_t *dst)
1966 {
1967         wave_read_regs(
1968                 adev, simd, wave, 0,
1969                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1970 }
1971
1972 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1973                                      uint32_t wave, uint32_t thread,
1974                                      uint32_t start, uint32_t size,
1975                                      uint32_t *dst)
1976 {
1977         wave_read_regs(
1978                 adev, simd, wave, thread,
1979                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1980 }
1981
1982 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1983                                   u32 me, u32 pipe, u32 q, u32 vm)
1984 {
1985         soc15_grbm_select(adev, me, pipe, q, vm);
1986 }
1987
1988 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1989         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1990         .select_se_sh = &gfx_v9_0_select_se_sh,
1991         .read_wave_data = &gfx_v9_0_read_wave_data,
1992         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1993         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1994         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1995         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1996         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1997 };
1998
1999 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2000         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2001         .select_se_sh = &gfx_v9_0_select_se_sh,
2002         .read_wave_data = &gfx_v9_0_read_wave_data,
2003         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2004         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2005         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2006         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2007         .query_ras_error_count = &gfx_v9_4_query_ras_error_count
2008 };
2009
2010 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2011 {
2012         u32 gb_addr_config;
2013         int err;
2014
2015         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2016
2017         switch (adev->asic_type) {
2018         case CHIP_VEGA10:
2019                 adev->gfx.config.max_hw_contexts = 8;
2020                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2021                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2022                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2023                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2024                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2025                 break;
2026         case CHIP_VEGA12:
2027                 adev->gfx.config.max_hw_contexts = 8;
2028                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2029                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2030                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2031                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2032                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2033                 DRM_INFO("fix gfx.config for vega12\n");
2034                 break;
2035         case CHIP_VEGA20:
2036                 adev->gfx.config.max_hw_contexts = 8;
2037                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2038                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2039                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2040                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2041                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2042                 gb_addr_config &= ~0xf3e777ff;
2043                 gb_addr_config |= 0x22014042;
2044                 /* check vbios table if gpu info is not available */
2045                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2046                 if (err)
2047                         return err;
2048                 break;
2049         case CHIP_RAVEN:
2050                 adev->gfx.config.max_hw_contexts = 8;
2051                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2052                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2053                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2054                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2055                 if (adev->rev_id >= 8)
2056                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2057                 else
2058                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2059                 break;
2060         case CHIP_ARCTURUS:
2061                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2062                 adev->gfx.config.max_hw_contexts = 8;
2063                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2064                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2065                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2066                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2067                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2068                 gb_addr_config &= ~0xf3e777ff;
2069                 gb_addr_config |= 0x22014042;
2070                 break;
2071         case CHIP_RENOIR:
2072                 adev->gfx.config.max_hw_contexts = 8;
2073                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2076                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078                 gb_addr_config &= ~0xf3e777ff;
2079                 gb_addr_config |= 0x22010042;
2080                 break;
2081         default:
2082                 BUG();
2083                 break;
2084         }
2085
2086         adev->gfx.config.gb_addr_config = gb_addr_config;
2087
2088         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2089                         REG_GET_FIELD(
2090                                         adev->gfx.config.gb_addr_config,
2091                                         GB_ADDR_CONFIG,
2092                                         NUM_PIPES);
2093
2094         adev->gfx.config.max_tile_pipes =
2095                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2096
2097         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2098                         REG_GET_FIELD(
2099                                         adev->gfx.config.gb_addr_config,
2100                                         GB_ADDR_CONFIG,
2101                                         NUM_BANKS);
2102         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2103                         REG_GET_FIELD(
2104                                         adev->gfx.config.gb_addr_config,
2105                                         GB_ADDR_CONFIG,
2106                                         MAX_COMPRESSED_FRAGS);
2107         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2108                         REG_GET_FIELD(
2109                                         adev->gfx.config.gb_addr_config,
2110                                         GB_ADDR_CONFIG,
2111                                         NUM_RB_PER_SE);
2112         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2113                         REG_GET_FIELD(
2114                                         adev->gfx.config.gb_addr_config,
2115                                         GB_ADDR_CONFIG,
2116                                         NUM_SHADER_ENGINES);
2117         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2118                         REG_GET_FIELD(
2119                                         adev->gfx.config.gb_addr_config,
2120                                         GB_ADDR_CONFIG,
2121                                         PIPE_INTERLEAVE_SIZE));
2122
2123         return 0;
2124 }
2125
2126 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2127                                       int mec, int pipe, int queue)
2128 {
2129         int r;
2130         unsigned irq_type;
2131         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2132
2133         ring = &adev->gfx.compute_ring[ring_id];
2134
2135         /* mec0 is me1 */
2136         ring->me = mec + 1;
2137         ring->pipe = pipe;
2138         ring->queue = queue;
2139
2140         ring->ring_obj = NULL;
2141         ring->use_doorbell = true;
2142         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2143         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2144                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2145         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2146
2147         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2148                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2149                 + ring->pipe;
2150
2151         /* type-2 packets are deprecated on MEC, use type-3 instead */
2152         r = amdgpu_ring_init(adev, ring, 1024,
2153                              &adev->gfx.eop_irq, irq_type);
2154         if (r)
2155                 return r;
2156
2157
2158         return 0;
2159 }
2160
2161 static int gfx_v9_0_sw_init(void *handle)
2162 {
2163         int i, j, k, r, ring_id;
2164         struct amdgpu_ring *ring;
2165         struct amdgpu_kiq *kiq;
2166         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2167
2168         switch (adev->asic_type) {
2169         case CHIP_VEGA10:
2170         case CHIP_VEGA12:
2171         case CHIP_VEGA20:
2172         case CHIP_RAVEN:
2173         case CHIP_ARCTURUS:
2174         case CHIP_RENOIR:
2175                 adev->gfx.mec.num_mec = 2;
2176                 break;
2177         default:
2178                 adev->gfx.mec.num_mec = 1;
2179                 break;
2180         }
2181
2182         adev->gfx.mec.num_pipe_per_mec = 4;
2183         adev->gfx.mec.num_queue_per_pipe = 8;
2184
2185         /* EOP Event */
2186         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2187         if (r)
2188                 return r;
2189
2190         /* Privileged reg */
2191         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2192                               &adev->gfx.priv_reg_irq);
2193         if (r)
2194                 return r;
2195
2196         /* Privileged inst */
2197         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2198                               &adev->gfx.priv_inst_irq);
2199         if (r)
2200                 return r;
2201
2202         /* ECC error */
2203         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2204                               &adev->gfx.cp_ecc_error_irq);
2205         if (r)
2206                 return r;
2207
2208         /* FUE error */
2209         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2210                               &adev->gfx.cp_ecc_error_irq);
2211         if (r)
2212                 return r;
2213
2214         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2215
2216         gfx_v9_0_scratch_init(adev);
2217
2218         r = gfx_v9_0_init_microcode(adev);
2219         if (r) {
2220                 DRM_ERROR("Failed to load gfx firmware!\n");
2221                 return r;
2222         }
2223
2224         r = adev->gfx.rlc.funcs->init(adev);
2225         if (r) {
2226                 DRM_ERROR("Failed to init rlc BOs!\n");
2227                 return r;
2228         }
2229
2230         r = gfx_v9_0_mec_init(adev);
2231         if (r) {
2232                 DRM_ERROR("Failed to init MEC BOs!\n");
2233                 return r;
2234         }
2235
2236         /* set up the gfx ring */
2237         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2238                 ring = &adev->gfx.gfx_ring[i];
2239                 ring->ring_obj = NULL;
2240                 if (!i)
2241                         sprintf(ring->name, "gfx");
2242                 else
2243                         sprintf(ring->name, "gfx_%d", i);
2244                 ring->use_doorbell = true;
2245                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2246                 r = amdgpu_ring_init(adev, ring, 1024,
2247                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2248                 if (r)
2249                         return r;
2250         }
2251
2252         /* set up the compute queues - allocate horizontally across pipes */
2253         ring_id = 0;
2254         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2255                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2256                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2257                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2258                                         continue;
2259
2260                                 r = gfx_v9_0_compute_ring_init(adev,
2261                                                                ring_id,
2262                                                                i, k, j);
2263                                 if (r)
2264                                         return r;
2265
2266                                 ring_id++;
2267                         }
2268                 }
2269         }
2270
2271         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2272         if (r) {
2273                 DRM_ERROR("Failed to init KIQ BOs!\n");
2274                 return r;
2275         }
2276
2277         kiq = &adev->gfx.kiq;
2278         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2279         if (r)
2280                 return r;
2281
2282         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2283         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2284         if (r)
2285                 return r;
2286
2287         adev->gfx.ce_ram_size = 0x8000;
2288
2289         r = gfx_v9_0_gpu_early_init(adev);
2290         if (r)
2291                 return r;
2292
2293         return 0;
2294 }
2295
2296
2297 static int gfx_v9_0_sw_fini(void *handle)
2298 {
2299         int i;
2300         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2301
2302         amdgpu_gfx_ras_fini(adev);
2303
2304         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2305                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2306         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2307                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2308
2309         amdgpu_gfx_mqd_sw_fini(adev);
2310         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2311         amdgpu_gfx_kiq_fini(adev);
2312
2313         gfx_v9_0_mec_fini(adev);
2314         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2315         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2316                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2317                                 &adev->gfx.rlc.cp_table_gpu_addr,
2318                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2319         }
2320         gfx_v9_0_free_microcode(adev);
2321
2322         return 0;
2323 }
2324
2325
2326 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2327 {
2328         /* TODO */
2329 }
2330
2331 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2332 {
2333         u32 data;
2334
2335         if (instance == 0xffffffff)
2336                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2337         else
2338                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2339
2340         if (se_num == 0xffffffff)
2341                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2342         else
2343                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2344
2345         if (sh_num == 0xffffffff)
2346                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2347         else
2348                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2349
2350         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2351 }
2352
2353 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2354 {
2355         u32 data, mask;
2356
2357         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2358         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2359
2360         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2361         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2362
2363         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2364                                          adev->gfx.config.max_sh_per_se);
2365
2366         return (~data) & mask;
2367 }
2368
2369 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2370 {
2371         int i, j;
2372         u32 data;
2373         u32 active_rbs = 0;
2374         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2375                                         adev->gfx.config.max_sh_per_se;
2376
2377         mutex_lock(&adev->grbm_idx_mutex);
2378         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2379                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2380                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2381                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2382                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2383                                                rb_bitmap_width_per_sh);
2384                 }
2385         }
2386         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2387         mutex_unlock(&adev->grbm_idx_mutex);
2388
2389         adev->gfx.config.backend_enable_mask = active_rbs;
2390         adev->gfx.config.num_rbs = hweight32(active_rbs);
2391 }
2392
2393 #define DEFAULT_SH_MEM_BASES    (0x6000)
2394 #define FIRST_COMPUTE_VMID      (8)
2395 #define LAST_COMPUTE_VMID       (16)
2396 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2397 {
2398         int i;
2399         uint32_t sh_mem_config;
2400         uint32_t sh_mem_bases;
2401
2402         /*
2403          * Configure apertures:
2404          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2405          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2406          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2407          */
2408         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2409
2410         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2411                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2412                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2413
2414         mutex_lock(&adev->srbm_mutex);
2415         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2416                 soc15_grbm_select(adev, 0, 0, 0, i);
2417                 /* CP and shaders */
2418                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2419                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2420         }
2421         soc15_grbm_select(adev, 0, 0, 0, 0);
2422         mutex_unlock(&adev->srbm_mutex);
2423
2424         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2425            acccess. These should be enabled by FW for target VMIDs. */
2426         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2427                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2428                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2429                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2430                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2431         }
2432 }
2433
2434 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2435 {
2436         int vmid;
2437
2438         /*
2439          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2440          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2441          * the driver can enable them for graphics. VMID0 should maintain
2442          * access so that HWS firmware can save/restore entries.
2443          */
2444         for (vmid = 1; vmid < 16; vmid++) {
2445                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2446                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2447                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2448                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2449         }
2450 }
2451
2452 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2453 {
2454         uint32_t tmp;
2455
2456         switch (adev->asic_type) {
2457         case CHIP_ARCTURUS:
2458                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2459                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2460                                         DISABLE_BARRIER_WAITCNT, 1);
2461                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2462                 break;
2463         default:
2464                 break;
2465         };
2466 }
2467
2468 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2469 {
2470         u32 tmp;
2471         int i;
2472
2473         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2474
2475         gfx_v9_0_tiling_mode_table_init(adev);
2476
2477         gfx_v9_0_setup_rb(adev);
2478         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2479         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2480
2481         /* XXX SH_MEM regs */
2482         /* where to put LDS, scratch, GPUVM in FSA64 space */
2483         mutex_lock(&adev->srbm_mutex);
2484         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2485                 soc15_grbm_select(adev, 0, 0, 0, i);
2486                 /* CP and shaders */
2487                 if (i == 0) {
2488                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2489                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2490                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2491                                             !!amdgpu_noretry);
2492                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2493                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2494                 } else {
2495                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2496                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2497                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2498                                             !!amdgpu_noretry);
2499                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2500                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2501                                 (adev->gmc.private_aperture_start >> 48));
2502                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2503                                 (adev->gmc.shared_aperture_start >> 48));
2504                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2505                 }
2506         }
2507         soc15_grbm_select(adev, 0, 0, 0, 0);
2508
2509         mutex_unlock(&adev->srbm_mutex);
2510
2511         gfx_v9_0_init_compute_vmid(adev);
2512         gfx_v9_0_init_gds_vmid(adev);
2513         gfx_v9_0_init_sq_config(adev);
2514 }
2515
2516 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2517 {
2518         u32 i, j, k;
2519         u32 mask;
2520
2521         mutex_lock(&adev->grbm_idx_mutex);
2522         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2523                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2524                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2525                         for (k = 0; k < adev->usec_timeout; k++) {
2526                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2527                                         break;
2528                                 udelay(1);
2529                         }
2530                         if (k == adev->usec_timeout) {
2531                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2532                                                       0xffffffff, 0xffffffff);
2533                                 mutex_unlock(&adev->grbm_idx_mutex);
2534                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2535                                          i, j);
2536                                 return;
2537                         }
2538                 }
2539         }
2540         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2541         mutex_unlock(&adev->grbm_idx_mutex);
2542
2543         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2544                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2545                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2546                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2547         for (k = 0; k < adev->usec_timeout; k++) {
2548                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2549                         break;
2550                 udelay(1);
2551         }
2552 }
2553
2554 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2555                                                bool enable)
2556 {
2557         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2558
2559         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2560         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2561         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2562         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2563
2564         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2565 }
2566
2567 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2568 {
2569         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2570         /* csib */
2571         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2572                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2573         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2574                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2575         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2576                         adev->gfx.rlc.clear_state_size);
2577 }
2578
2579 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2580                                 int indirect_offset,
2581                                 int list_size,
2582                                 int *unique_indirect_regs,
2583                                 int unique_indirect_reg_count,
2584                                 int *indirect_start_offsets,
2585                                 int *indirect_start_offsets_count,
2586                                 int max_start_offsets_count)
2587 {
2588         int idx;
2589
2590         for (; indirect_offset < list_size; indirect_offset++) {
2591                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2592                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2593                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2594
2595                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2596                         indirect_offset += 2;
2597
2598                         /* look for the matching indice */
2599                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2600                                 if (unique_indirect_regs[idx] ==
2601                                         register_list_format[indirect_offset] ||
2602                                         !unique_indirect_regs[idx])
2603                                         break;
2604                         }
2605
2606                         BUG_ON(idx >= unique_indirect_reg_count);
2607
2608                         if (!unique_indirect_regs[idx])
2609                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2610
2611                         indirect_offset++;
2612                 }
2613         }
2614 }
2615
2616 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2617 {
2618         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2619         int unique_indirect_reg_count = 0;
2620
2621         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2622         int indirect_start_offsets_count = 0;
2623
2624         int list_size = 0;
2625         int i = 0, j = 0;
2626         u32 tmp = 0;
2627
2628         u32 *register_list_format =
2629                 kmemdup(adev->gfx.rlc.register_list_format,
2630                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2631         if (!register_list_format)
2632                 return -ENOMEM;
2633
2634         /* setup unique_indirect_regs array and indirect_start_offsets array */
2635         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2636         gfx_v9_1_parse_ind_reg_list(register_list_format,
2637                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2638                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2639                                     unique_indirect_regs,
2640                                     unique_indirect_reg_count,
2641                                     indirect_start_offsets,
2642                                     &indirect_start_offsets_count,
2643                                     ARRAY_SIZE(indirect_start_offsets));
2644
2645         /* enable auto inc in case it is disabled */
2646         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2647         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2648         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2649
2650         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2651         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2652                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2653         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2654                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2655                         adev->gfx.rlc.register_restore[i]);
2656
2657         /* load indirect register */
2658         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2659                 adev->gfx.rlc.reg_list_format_start);
2660
2661         /* direct register portion */
2662         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2663                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2664                         register_list_format[i]);
2665
2666         /* indirect register portion */
2667         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2668                 if (register_list_format[i] == 0xFFFFFFFF) {
2669                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2670                         continue;
2671                 }
2672
2673                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2674                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2675
2676                 for (j = 0; j < unique_indirect_reg_count; j++) {
2677                         if (register_list_format[i] == unique_indirect_regs[j]) {
2678                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2679                                 break;
2680                         }
2681                 }
2682
2683                 BUG_ON(j >= unique_indirect_reg_count);
2684
2685                 i++;
2686         }
2687
2688         /* set save/restore list size */
2689         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2690         list_size = list_size >> 1;
2691         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2692                 adev->gfx.rlc.reg_restore_list_size);
2693         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2694
2695         /* write the starting offsets to RLC scratch ram */
2696         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2697                 adev->gfx.rlc.starting_offsets_start);
2698         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2699                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2700                        indirect_start_offsets[i]);
2701
2702         /* load unique indirect regs*/
2703         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2704                 if (unique_indirect_regs[i] != 0) {
2705                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2706                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2707                                unique_indirect_regs[i] & 0x3FFFF);
2708
2709                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2710                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2711                                unique_indirect_regs[i] >> 20);
2712                 }
2713         }
2714
2715         kfree(register_list_format);
2716         return 0;
2717 }
2718
2719 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2720 {
2721         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2722 }
2723
2724 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2725                                              bool enable)
2726 {
2727         uint32_t data = 0;
2728         uint32_t default_data = 0;
2729
2730         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2731         if (enable == true) {
2732                 /* enable GFXIP control over CGPG */
2733                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2734                 if(default_data != data)
2735                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2736
2737                 /* update status */
2738                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2739                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2740                 if(default_data != data)
2741                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2742         } else {
2743                 /* restore GFXIP control over GCPG */
2744                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2745                 if(default_data != data)
2746                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2747         }
2748 }
2749
2750 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2751 {
2752         uint32_t data = 0;
2753
2754         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2755                               AMD_PG_SUPPORT_GFX_SMG |
2756                               AMD_PG_SUPPORT_GFX_DMG)) {
2757                 /* init IDLE_POLL_COUNT = 60 */
2758                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2759                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2760                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2761                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2762
2763                 /* init RLC PG Delay */
2764                 data = 0;
2765                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2766                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2767                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2768                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2769                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2770
2771                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2772                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2773                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2774                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2775
2776                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2777                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2778                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2779                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2780
2781                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2782                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2783
2784                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2785                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2786                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2787
2788                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2789         }
2790 }
2791
2792 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2793                                                 bool enable)
2794 {
2795         uint32_t data = 0;
2796         uint32_t default_data = 0;
2797
2798         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2799         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2800                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2801                              enable ? 1 : 0);
2802         if (default_data != data)
2803                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2804 }
2805
2806 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2807                                                 bool enable)
2808 {
2809         uint32_t data = 0;
2810         uint32_t default_data = 0;
2811
2812         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2813         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2814                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2815                              enable ? 1 : 0);
2816         if(default_data != data)
2817                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2818 }
2819
2820 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2821                                         bool enable)
2822 {
2823         uint32_t data = 0;
2824         uint32_t default_data = 0;
2825
2826         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2827         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2828                              CP_PG_DISABLE,
2829                              enable ? 0 : 1);
2830         if(default_data != data)
2831                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2832 }
2833
2834 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2835                                                 bool enable)
2836 {
2837         uint32_t data, default_data;
2838
2839         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2840         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2841                              GFX_POWER_GATING_ENABLE,
2842                              enable ? 1 : 0);
2843         if(default_data != data)
2844                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2845 }
2846
2847 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2848                                                 bool enable)
2849 {
2850         uint32_t data, default_data;
2851
2852         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2853         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2854                              GFX_PIPELINE_PG_ENABLE,
2855                              enable ? 1 : 0);
2856         if(default_data != data)
2857                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2858
2859         if (!enable)
2860                 /* read any GFX register to wake up GFX */
2861                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2862 }
2863
2864 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2865                                                        bool enable)
2866 {
2867         uint32_t data, default_data;
2868
2869         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2870         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2871                              STATIC_PER_CU_PG_ENABLE,
2872                              enable ? 1 : 0);
2873         if(default_data != data)
2874                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2875 }
2876
2877 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2878                                                 bool enable)
2879 {
2880         uint32_t data, default_data;
2881
2882         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2883         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2884                              DYN_PER_CU_PG_ENABLE,
2885                              enable ? 1 : 0);
2886         if(default_data != data)
2887                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2888 }
2889
2890 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2891 {
2892         gfx_v9_0_init_csb(adev);
2893
2894         /*
2895          * Rlc save restore list is workable since v2_1.
2896          * And it's needed by gfxoff feature.
2897          */
2898         if (adev->gfx.rlc.is_rlc_v2_1) {
2899                 if (adev->asic_type == CHIP_VEGA12 ||
2900                     (adev->asic_type == CHIP_RAVEN &&
2901                      adev->rev_id >= 8))
2902                         gfx_v9_1_init_rlc_save_restore_list(adev);
2903                 gfx_v9_0_enable_save_restore_machine(adev);
2904         }
2905
2906         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2907                               AMD_PG_SUPPORT_GFX_SMG |
2908                               AMD_PG_SUPPORT_GFX_DMG |
2909                               AMD_PG_SUPPORT_CP |
2910                               AMD_PG_SUPPORT_GDS |
2911                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2912                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2913                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2914                 gfx_v9_0_init_gfx_power_gating(adev);
2915         }
2916 }
2917
2918 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2919 {
2920         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2921         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2922         gfx_v9_0_wait_for_rlc_serdes(adev);
2923 }
2924
2925 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2926 {
2927         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2928         udelay(50);
2929         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2930         udelay(50);
2931 }
2932
2933 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2934 {
2935 #ifdef AMDGPU_RLC_DEBUG_RETRY
2936         u32 rlc_ucode_ver;
2937 #endif
2938
2939         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2940         udelay(50);
2941
2942         /* carrizo do enable cp interrupt after cp inited */
2943         if (!(adev->flags & AMD_IS_APU)) {
2944                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2945                 udelay(50);
2946         }
2947
2948 #ifdef AMDGPU_RLC_DEBUG_RETRY
2949         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2950         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2951         if(rlc_ucode_ver == 0x108) {
2952                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2953                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2954                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2955                  * default is 0x9C4 to create a 100us interval */
2956                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2957                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2958                  * to disable the page fault retry interrupts, default is
2959                  * 0x100 (256) */
2960                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2961         }
2962 #endif
2963 }
2964
2965 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2966 {
2967         const struct rlc_firmware_header_v2_0 *hdr;
2968         const __le32 *fw_data;
2969         unsigned i, fw_size;
2970
2971         if (!adev->gfx.rlc_fw)
2972                 return -EINVAL;
2973
2974         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2975         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2976
2977         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2978                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2979         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2980
2981         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2982                         RLCG_UCODE_LOADING_START_ADDRESS);
2983         for (i = 0; i < fw_size; i++)
2984                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2985         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2986
2987         return 0;
2988 }
2989
2990 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2991 {
2992         int r;
2993
2994         if (amdgpu_sriov_vf(adev)) {
2995                 gfx_v9_0_init_csb(adev);
2996                 return 0;
2997         }
2998
2999         adev->gfx.rlc.funcs->stop(adev);
3000
3001         /* disable CG */
3002         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3003
3004         gfx_v9_0_init_pg(adev);
3005
3006         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3007                 /* legacy rlc firmware loading */
3008                 r = gfx_v9_0_rlc_load_microcode(adev);
3009                 if (r)
3010                         return r;
3011         }
3012
3013         switch (adev->asic_type) {
3014         case CHIP_RAVEN:
3015                 if (amdgpu_lbpw == 0)
3016                         gfx_v9_0_enable_lbpw(adev, false);
3017                 else
3018                         gfx_v9_0_enable_lbpw(adev, true);
3019                 break;
3020         case CHIP_VEGA20:
3021                 if (amdgpu_lbpw > 0)
3022                         gfx_v9_0_enable_lbpw(adev, true);
3023                 else
3024                         gfx_v9_0_enable_lbpw(adev, false);
3025                 break;
3026         default:
3027                 break;
3028         }
3029
3030         adev->gfx.rlc.funcs->start(adev);
3031
3032         return 0;
3033 }
3034
3035 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3036 {
3037         int i;
3038         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3039
3040         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3041         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3042         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3043         if (!enable) {
3044                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3045                         adev->gfx.gfx_ring[i].sched.ready = false;
3046         }
3047         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3048         udelay(50);
3049 }
3050
3051 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3052 {
3053         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3054         const struct gfx_firmware_header_v1_0 *ce_hdr;
3055         const struct gfx_firmware_header_v1_0 *me_hdr;
3056         const __le32 *fw_data;
3057         unsigned i, fw_size;
3058
3059         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3060                 return -EINVAL;
3061
3062         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3063                 adev->gfx.pfp_fw->data;
3064         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3065                 adev->gfx.ce_fw->data;
3066         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3067                 adev->gfx.me_fw->data;
3068
3069         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3070         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3071         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3072
3073         gfx_v9_0_cp_gfx_enable(adev, false);
3074
3075         /* PFP */
3076         fw_data = (const __le32 *)
3077                 (adev->gfx.pfp_fw->data +
3078                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3079         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3080         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3081         for (i = 0; i < fw_size; i++)
3082                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3083         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3084
3085         /* CE */
3086         fw_data = (const __le32 *)
3087                 (adev->gfx.ce_fw->data +
3088                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3089         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3090         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3091         for (i = 0; i < fw_size; i++)
3092                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3093         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3094
3095         /* ME */
3096         fw_data = (const __le32 *)
3097                 (adev->gfx.me_fw->data +
3098                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3099         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3100         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3101         for (i = 0; i < fw_size; i++)
3102                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3103         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3104
3105         return 0;
3106 }
3107
3108 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3109 {
3110         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3111         const struct cs_section_def *sect = NULL;
3112         const struct cs_extent_def *ext = NULL;
3113         int r, i, tmp;
3114
3115         /* init the CP */
3116         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3117         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3118
3119         gfx_v9_0_cp_gfx_enable(adev, true);
3120
3121         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3122         if (r) {
3123                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3124                 return r;
3125         }
3126
3127         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3128         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3129
3130         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3131         amdgpu_ring_write(ring, 0x80000000);
3132         amdgpu_ring_write(ring, 0x80000000);
3133
3134         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3135                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3136                         if (sect->id == SECT_CONTEXT) {
3137                                 amdgpu_ring_write(ring,
3138                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3139                                                ext->reg_count));
3140                                 amdgpu_ring_write(ring,
3141                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3142                                 for (i = 0; i < ext->reg_count; i++)
3143                                         amdgpu_ring_write(ring, ext->extent[i]);
3144                         }
3145                 }
3146         }
3147
3148         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3149         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3150
3151         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3152         amdgpu_ring_write(ring, 0);
3153
3154         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3155         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3156         amdgpu_ring_write(ring, 0x8000);
3157         amdgpu_ring_write(ring, 0x8000);
3158
3159         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3160         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3161                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3162         amdgpu_ring_write(ring, tmp);
3163         amdgpu_ring_write(ring, 0);
3164
3165         amdgpu_ring_commit(ring);
3166
3167         return 0;
3168 }
3169
3170 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3171 {
3172         struct amdgpu_ring *ring;
3173         u32 tmp;
3174         u32 rb_bufsz;
3175         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3176
3177         /* Set the write pointer delay */
3178         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3179
3180         /* set the RB to use vmid 0 */
3181         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3182
3183         /* Set ring buffer size */
3184         ring = &adev->gfx.gfx_ring[0];
3185         rb_bufsz = order_base_2(ring->ring_size / 8);
3186         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3187         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3188 #ifdef __BIG_ENDIAN
3189         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3190 #endif
3191         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3192
3193         /* Initialize the ring buffer's write pointers */
3194         ring->wptr = 0;
3195         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3196         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3197
3198         /* set the wb address wether it's enabled or not */
3199         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3200         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3201         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3202
3203         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3204         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3205         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3206
3207         mdelay(1);
3208         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3209
3210         rb_addr = ring->gpu_addr >> 8;
3211         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3212         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3213
3214         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3215         if (ring->use_doorbell) {
3216                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3217                                     DOORBELL_OFFSET, ring->doorbell_index);
3218                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3219                                     DOORBELL_EN, 1);
3220         } else {
3221                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3222         }
3223         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3224
3225         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3226                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3227         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3228
3229         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3230                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3231
3232
3233         /* start the ring */
3234         gfx_v9_0_cp_gfx_start(adev);
3235         ring->sched.ready = true;
3236
3237         return 0;
3238 }
3239
3240 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3241 {
3242         int i;
3243
3244         if (enable) {
3245                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3246         } else {
3247                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3248                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3249                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3250                         adev->gfx.compute_ring[i].sched.ready = false;
3251                 adev->gfx.kiq.ring.sched.ready = false;
3252         }
3253         udelay(50);
3254 }
3255
3256 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3257 {
3258         const struct gfx_firmware_header_v1_0 *mec_hdr;
3259         const __le32 *fw_data;
3260         unsigned i;
3261         u32 tmp;
3262
3263         if (!adev->gfx.mec_fw)
3264                 return -EINVAL;
3265
3266         gfx_v9_0_cp_compute_enable(adev, false);
3267
3268         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3269         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3270
3271         fw_data = (const __le32 *)
3272                 (adev->gfx.mec_fw->data +
3273                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3274         tmp = 0;
3275         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3276         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3277         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3278
3279         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3280                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3281         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3282                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3283
3284         /* MEC1 */
3285         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3286                          mec_hdr->jt_offset);
3287         for (i = 0; i < mec_hdr->jt_size; i++)
3288                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3289                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3290
3291         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3292                         adev->gfx.mec_fw_version);
3293         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3294
3295         return 0;
3296 }
3297
3298 /* KIQ functions */
3299 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3300 {
3301         uint32_t tmp;
3302         struct amdgpu_device *adev = ring->adev;
3303
3304         /* tell RLC which is KIQ queue */
3305         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3306         tmp &= 0xffffff00;
3307         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3308         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3309         tmp |= 0x80;
3310         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3311 }
3312
3313 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3314 {
3315         struct amdgpu_device *adev = ring->adev;
3316         struct v9_mqd *mqd = ring->mqd_ptr;
3317         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3318         uint32_t tmp;
3319
3320         mqd->header = 0xC0310800;
3321         mqd->compute_pipelinestat_enable = 0x00000001;
3322         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3323         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3324         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3325         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3326         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3327         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3328         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3329         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3330         mqd->compute_misc_reserved = 0x00000003;
3331
3332         mqd->dynamic_cu_mask_addr_lo =
3333                 lower_32_bits(ring->mqd_gpu_addr
3334                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3335         mqd->dynamic_cu_mask_addr_hi =
3336                 upper_32_bits(ring->mqd_gpu_addr
3337                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3338
3339         eop_base_addr = ring->eop_gpu_addr >> 8;
3340         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3341         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3342
3343         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3344         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3345         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3346                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3347
3348         mqd->cp_hqd_eop_control = tmp;
3349
3350         /* enable doorbell? */
3351         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3352
3353         if (ring->use_doorbell) {
3354                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3355                                     DOORBELL_OFFSET, ring->doorbell_index);
3356                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3357                                     DOORBELL_EN, 1);
3358                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3359                                     DOORBELL_SOURCE, 0);
3360                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3361                                     DOORBELL_HIT, 0);
3362         } else {
3363                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3364                                          DOORBELL_EN, 0);
3365         }
3366
3367         mqd->cp_hqd_pq_doorbell_control = tmp;
3368
3369         /* disable the queue if it's active */
3370         ring->wptr = 0;
3371         mqd->cp_hqd_dequeue_request = 0;
3372         mqd->cp_hqd_pq_rptr = 0;
3373         mqd->cp_hqd_pq_wptr_lo = 0;
3374         mqd->cp_hqd_pq_wptr_hi = 0;
3375
3376         /* set the pointer to the MQD */
3377         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3378         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3379
3380         /* set MQD vmid to 0 */
3381         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3382         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3383         mqd->cp_mqd_control = tmp;
3384
3385         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3386         hqd_gpu_addr = ring->gpu_addr >> 8;
3387         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3388         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3389
3390         /* set up the HQD, this is similar to CP_RB0_CNTL */
3391         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3392         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3393                             (order_base_2(ring->ring_size / 4) - 1));
3394         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3395                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3396 #ifdef __BIG_ENDIAN
3397         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3398 #endif
3399         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3400         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3401         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3402         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3403         mqd->cp_hqd_pq_control = tmp;
3404
3405         /* set the wb address whether it's enabled or not */
3406         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3407         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3408         mqd->cp_hqd_pq_rptr_report_addr_hi =
3409                 upper_32_bits(wb_gpu_addr) & 0xffff;
3410
3411         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3412         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3413         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3414         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3415
3416         tmp = 0;
3417         /* enable the doorbell if requested */
3418         if (ring->use_doorbell) {
3419                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3420                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3421                                 DOORBELL_OFFSET, ring->doorbell_index);
3422
3423                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3424                                          DOORBELL_EN, 1);
3425                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3426                                          DOORBELL_SOURCE, 0);
3427                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3428                                          DOORBELL_HIT, 0);
3429         }
3430
3431         mqd->cp_hqd_pq_doorbell_control = tmp;
3432
3433         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3434         ring->wptr = 0;
3435         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3436
3437         /* set the vmid for the queue */
3438         mqd->cp_hqd_vmid = 0;
3439
3440         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3441         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3442         mqd->cp_hqd_persistent_state = tmp;
3443
3444         /* set MIN_IB_AVAIL_SIZE */
3445         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3446         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3447         mqd->cp_hqd_ib_control = tmp;
3448
3449         /* map_queues packet doesn't need activate the queue,
3450          * so only kiq need set this field.
3451          */
3452         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3453                 mqd->cp_hqd_active = 1;
3454
3455         return 0;
3456 }
3457
3458 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3459 {
3460         struct amdgpu_device *adev = ring->adev;
3461         struct v9_mqd *mqd = ring->mqd_ptr;
3462         int j;
3463
3464         /* disable wptr polling */
3465         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3466
3467         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3468                mqd->cp_hqd_eop_base_addr_lo);
3469         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3470                mqd->cp_hqd_eop_base_addr_hi);
3471
3472         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3473         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3474                mqd->cp_hqd_eop_control);
3475
3476         /* enable doorbell? */
3477         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3478                mqd->cp_hqd_pq_doorbell_control);
3479
3480         /* disable the queue if it's active */
3481         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3482                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3483                 for (j = 0; j < adev->usec_timeout; j++) {
3484                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3485                                 break;
3486                         udelay(1);
3487                 }
3488                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3489                        mqd->cp_hqd_dequeue_request);
3490                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3491                        mqd->cp_hqd_pq_rptr);
3492                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3493                        mqd->cp_hqd_pq_wptr_lo);
3494                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3495                        mqd->cp_hqd_pq_wptr_hi);
3496         }
3497
3498         /* set the pointer to the MQD */
3499         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3500                mqd->cp_mqd_base_addr_lo);
3501         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3502                mqd->cp_mqd_base_addr_hi);
3503
3504         /* set MQD vmid to 0 */
3505         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3506                mqd->cp_mqd_control);
3507
3508         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3509         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3510                mqd->cp_hqd_pq_base_lo);
3511         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3512                mqd->cp_hqd_pq_base_hi);
3513
3514         /* set up the HQD, this is similar to CP_RB0_CNTL */
3515         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3516                mqd->cp_hqd_pq_control);
3517
3518         /* set the wb address whether it's enabled or not */
3519         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3520                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3521         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3522                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3523
3524         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3525         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3526                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3527         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3528                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3529
3530         /* enable the doorbell if requested */
3531         if (ring->use_doorbell) {
3532                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3533                                         (adev->doorbell_index.kiq * 2) << 2);
3534                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3535                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3536         }
3537
3538         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3539                mqd->cp_hqd_pq_doorbell_control);
3540
3541         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3542         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3543                mqd->cp_hqd_pq_wptr_lo);
3544         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3545                mqd->cp_hqd_pq_wptr_hi);
3546
3547         /* set the vmid for the queue */
3548         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3549
3550         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3551                mqd->cp_hqd_persistent_state);
3552
3553         /* activate the queue */
3554         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3555                mqd->cp_hqd_active);
3556
3557         if (ring->use_doorbell)
3558                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3559
3560         return 0;
3561 }
3562
3563 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3564 {
3565         struct amdgpu_device *adev = ring->adev;
3566         int j;
3567
3568         /* disable the queue if it's active */
3569         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3570
3571                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3572
3573                 for (j = 0; j < adev->usec_timeout; j++) {
3574                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3575                                 break;
3576                         udelay(1);
3577                 }
3578
3579                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3580                         DRM_DEBUG("KIQ dequeue request failed.\n");
3581
3582                         /* Manual disable if dequeue request times out */
3583                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3584                 }
3585
3586                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3587                       0);
3588         }
3589
3590         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3591         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3592         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3593         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3595         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3596         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3597         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3598
3599         return 0;
3600 }
3601
3602 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3603 {
3604         struct amdgpu_device *adev = ring->adev;
3605         struct v9_mqd *mqd = ring->mqd_ptr;
3606         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3607
3608         gfx_v9_0_kiq_setting(ring);
3609
3610         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3611                 /* reset MQD to a clean status */
3612                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3613                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3614
3615                 /* reset ring buffer */
3616                 ring->wptr = 0;
3617                 amdgpu_ring_clear_ring(ring);
3618
3619                 mutex_lock(&adev->srbm_mutex);
3620                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3621                 gfx_v9_0_kiq_init_register(ring);
3622                 soc15_grbm_select(adev, 0, 0, 0, 0);
3623                 mutex_unlock(&adev->srbm_mutex);
3624         } else {
3625                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3626                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3627                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3628                 mutex_lock(&adev->srbm_mutex);
3629                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3630                 gfx_v9_0_mqd_init(ring);
3631                 gfx_v9_0_kiq_init_register(ring);
3632                 soc15_grbm_select(adev, 0, 0, 0, 0);
3633                 mutex_unlock(&adev->srbm_mutex);
3634
3635                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3636                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3637         }
3638
3639         return 0;
3640 }
3641
3642 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3643 {
3644         struct amdgpu_device *adev = ring->adev;
3645         struct v9_mqd *mqd = ring->mqd_ptr;
3646         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3647
3648         if (!adev->in_gpu_reset && !adev->in_suspend) {
3649                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3650                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3651                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3652                 mutex_lock(&adev->srbm_mutex);
3653                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3654                 gfx_v9_0_mqd_init(ring);
3655                 soc15_grbm_select(adev, 0, 0, 0, 0);
3656                 mutex_unlock(&adev->srbm_mutex);
3657
3658                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3659                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3660         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3661                 /* reset MQD to a clean status */
3662                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3663                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3664
3665                 /* reset ring buffer */
3666                 ring->wptr = 0;
3667                 amdgpu_ring_clear_ring(ring);
3668         } else {
3669                 amdgpu_ring_clear_ring(ring);
3670         }
3671
3672         return 0;
3673 }
3674
3675 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3676 {
3677         struct amdgpu_ring *ring;
3678         int r;
3679
3680         ring = &adev->gfx.kiq.ring;
3681
3682         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3683         if (unlikely(r != 0))
3684                 return r;
3685
3686         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3687         if (unlikely(r != 0))
3688                 return r;
3689
3690         gfx_v9_0_kiq_init_queue(ring);
3691         amdgpu_bo_kunmap(ring->mqd_obj);
3692         ring->mqd_ptr = NULL;
3693         amdgpu_bo_unreserve(ring->mqd_obj);
3694         ring->sched.ready = true;
3695         return 0;
3696 }
3697
3698 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3699 {
3700         struct amdgpu_ring *ring = NULL;
3701         int r = 0, i;
3702
3703         gfx_v9_0_cp_compute_enable(adev, true);
3704
3705         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3706                 ring = &adev->gfx.compute_ring[i];
3707
3708                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3709                 if (unlikely(r != 0))
3710                         goto done;
3711                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3712                 if (!r) {
3713                         r = gfx_v9_0_kcq_init_queue(ring);
3714                         amdgpu_bo_kunmap(ring->mqd_obj);
3715                         ring->mqd_ptr = NULL;
3716                 }
3717                 amdgpu_bo_unreserve(ring->mqd_obj);
3718                 if (r)
3719                         goto done;
3720         }
3721
3722         r = amdgpu_gfx_enable_kcq(adev);
3723 done:
3724         return r;
3725 }
3726
3727 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3728 {
3729         int r, i;
3730         struct amdgpu_ring *ring;
3731
3732         if (!(adev->flags & AMD_IS_APU))
3733                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3734
3735         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3736                 if (adev->asic_type != CHIP_ARCTURUS) {
3737                         /* legacy firmware loading */
3738                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3739                         if (r)
3740                                 return r;
3741                 }
3742
3743                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3744                 if (r)
3745                         return r;
3746         }
3747
3748         r = gfx_v9_0_kiq_resume(adev);
3749         if (r)
3750                 return r;
3751
3752         if (adev->asic_type != CHIP_ARCTURUS) {
3753                 r = gfx_v9_0_cp_gfx_resume(adev);
3754                 if (r)
3755                         return r;
3756         }
3757
3758         r = gfx_v9_0_kcq_resume(adev);
3759         if (r)
3760                 return r;
3761
3762         if (adev->asic_type != CHIP_ARCTURUS) {
3763                 ring = &adev->gfx.gfx_ring[0];
3764                 r = amdgpu_ring_test_helper(ring);
3765                 if (r)
3766                         return r;
3767         }
3768
3769         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3770                 ring = &adev->gfx.compute_ring[i];
3771                 amdgpu_ring_test_helper(ring);
3772         }
3773
3774         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3775
3776         return 0;
3777 }
3778
3779 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3780 {
3781         u32 tmp;
3782
3783         if (adev->asic_type != CHIP_ARCTURUS)
3784                 return;
3785
3786         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3787         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3788                                 adev->df.hash_status.hash_64k);
3789         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3790                                 adev->df.hash_status.hash_2m);
3791         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3792                                 adev->df.hash_status.hash_1g);
3793         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3794 }
3795
3796 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3797 {
3798         if (adev->asic_type != CHIP_ARCTURUS)
3799                 gfx_v9_0_cp_gfx_enable(adev, enable);
3800         gfx_v9_0_cp_compute_enable(adev, enable);
3801 }
3802
3803 static int gfx_v9_0_hw_init(void *handle)
3804 {
3805         int r;
3806         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3807
3808         if (!amdgpu_sriov_vf(adev))
3809                 gfx_v9_0_init_golden_registers(adev);
3810
3811         gfx_v9_0_constants_init(adev);
3812
3813         gfx_v9_0_init_tcp_config(adev);
3814
3815         r = adev->gfx.rlc.funcs->resume(adev);
3816         if (r)
3817                 return r;
3818
3819         r = gfx_v9_0_cp_resume(adev);
3820         if (r)
3821                 return r;
3822
3823         return r;
3824 }
3825
3826 static int gfx_v9_0_hw_fini(void *handle)
3827 {
3828         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3829
3830         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3831         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3832         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3833
3834         /* DF freeze and kcq disable will fail */
3835         if (!amdgpu_ras_intr_triggered())
3836                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3837                 amdgpu_gfx_disable_kcq(adev);
3838
3839         if (amdgpu_sriov_vf(adev)) {
3840                 gfx_v9_0_cp_gfx_enable(adev, false);
3841                 /* must disable polling for SRIOV when hw finished, otherwise
3842                  * CPC engine may still keep fetching WB address which is already
3843                  * invalid after sw finished and trigger DMAR reading error in
3844                  * hypervisor side.
3845                  */
3846                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3847                 return 0;
3848         }
3849
3850         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3851          * otherwise KIQ is hanging when binding back
3852          */
3853         if (!adev->in_gpu_reset && !adev->in_suspend) {
3854                 mutex_lock(&adev->srbm_mutex);
3855                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3856                                 adev->gfx.kiq.ring.pipe,
3857                                 adev->gfx.kiq.ring.queue, 0);
3858                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3859                 soc15_grbm_select(adev, 0, 0, 0, 0);
3860                 mutex_unlock(&adev->srbm_mutex);
3861         }
3862
3863         gfx_v9_0_cp_enable(adev, false);
3864         adev->gfx.rlc.funcs->stop(adev);
3865
3866         return 0;
3867 }
3868
3869 static int gfx_v9_0_suspend(void *handle)
3870 {
3871         return gfx_v9_0_hw_fini(handle);
3872 }
3873
3874 static int gfx_v9_0_resume(void *handle)
3875 {
3876         return gfx_v9_0_hw_init(handle);
3877 }
3878
3879 static bool gfx_v9_0_is_idle(void *handle)
3880 {
3881         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3882
3883         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3884                                 GRBM_STATUS, GUI_ACTIVE))
3885                 return false;
3886         else
3887                 return true;
3888 }
3889
3890 static int gfx_v9_0_wait_for_idle(void *handle)
3891 {
3892         unsigned i;
3893         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3894
3895         for (i = 0; i < adev->usec_timeout; i++) {
3896                 if (gfx_v9_0_is_idle(handle))
3897                         return 0;
3898                 udelay(1);
3899         }
3900         return -ETIMEDOUT;
3901 }
3902
3903 static int gfx_v9_0_soft_reset(void *handle)
3904 {
3905         u32 grbm_soft_reset = 0;
3906         u32 tmp;
3907         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3908
3909         /* GRBM_STATUS */
3910         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3911         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3912                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3913                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3914                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3915                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3916                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3917                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3918                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3919                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3920                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3921         }
3922
3923         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3924                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3925                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3926         }
3927
3928         /* GRBM_STATUS2 */
3929         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3930         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3931                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3932                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3933
3934
3935         if (grbm_soft_reset) {
3936                 /* stop the rlc */
3937                 adev->gfx.rlc.funcs->stop(adev);
3938
3939                 if (adev->asic_type != CHIP_ARCTURUS)
3940                         /* Disable GFX parsing/prefetching */
3941                         gfx_v9_0_cp_gfx_enable(adev, false);
3942
3943                 /* Disable MEC parsing/prefetching */
3944                 gfx_v9_0_cp_compute_enable(adev, false);
3945
3946                 if (grbm_soft_reset) {
3947                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3948                         tmp |= grbm_soft_reset;
3949                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3950                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3951                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3952
3953                         udelay(50);
3954
3955                         tmp &= ~grbm_soft_reset;
3956                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3957                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3958                 }
3959
3960                 /* Wait a little for things to settle down */
3961                 udelay(50);
3962         }
3963         return 0;
3964 }
3965
3966 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3967 {
3968         signed long r, cnt = 0;
3969         unsigned long flags;
3970         uint32_t seq;
3971         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3972         struct amdgpu_ring *ring = &kiq->ring;
3973
3974         BUG_ON(!ring->funcs->emit_rreg);
3975
3976         spin_lock_irqsave(&kiq->ring_lock, flags);
3977         amdgpu_ring_alloc(ring, 32);
3978         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3979         amdgpu_ring_write(ring, 9 |     /* src: register*/
3980                                 (5 << 8) |      /* dst: memory */
3981                                 (1 << 16) |     /* count sel */
3982                                 (1 << 20));     /* write confirm */
3983         amdgpu_ring_write(ring, 0);
3984         amdgpu_ring_write(ring, 0);
3985         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3986                                 kiq->reg_val_offs * 4));
3987         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3988                                 kiq->reg_val_offs * 4));
3989         amdgpu_fence_emit_polling(ring, &seq);
3990         amdgpu_ring_commit(ring);
3991         spin_unlock_irqrestore(&kiq->ring_lock, flags);
3992
3993         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3994
3995         /* don't wait anymore for gpu reset case because this way may
3996          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3997          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3998          * never return if we keep waiting in virt_kiq_rreg, which cause
3999          * gpu_recover() hang there.
4000          *
4001          * also don't wait anymore for IRQ context
4002          * */
4003         if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
4004                 goto failed_kiq_read;
4005
4006         might_sleep();
4007         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4008                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4009                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4010         }
4011
4012         if (cnt > MAX_KIQ_REG_TRY)
4013                 goto failed_kiq_read;
4014
4015         return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
4016                 (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
4017
4018 failed_kiq_read:
4019         pr_err("failed to read gpu clock\n");
4020         return ~0;
4021 }
4022
4023 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4024 {
4025         uint64_t clock;
4026
4027         amdgpu_gfx_off_ctrl(adev, false);
4028         mutex_lock(&adev->gfx.gpu_clock_mutex);
4029         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4030                 clock = gfx_v9_0_kiq_read_clock(adev);
4031         } else {
4032                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4033                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4034                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4035         }
4036         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4037         amdgpu_gfx_off_ctrl(adev, true);
4038         return clock;
4039 }
4040
4041 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4042                                           uint32_t vmid,
4043                                           uint32_t gds_base, uint32_t gds_size,
4044                                           uint32_t gws_base, uint32_t gws_size,
4045                                           uint32_t oa_base, uint32_t oa_size)
4046 {
4047         struct amdgpu_device *adev = ring->adev;
4048
4049         /* GDS Base */
4050         gfx_v9_0_write_data_to_reg(ring, 0, false,
4051                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4052                                    gds_base);
4053
4054         /* GDS Size */
4055         gfx_v9_0_write_data_to_reg(ring, 0, false,
4056                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4057                                    gds_size);
4058
4059         /* GWS */
4060         gfx_v9_0_write_data_to_reg(ring, 0, false,
4061                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4062                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4063
4064         /* OA */
4065         gfx_v9_0_write_data_to_reg(ring, 0, false,
4066                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4067                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4068 }
4069
4070 static const u32 vgpr_init_compute_shader[] =
4071 {
4072         0xb07c0000, 0xbe8000ff,
4073         0x000000f8, 0xbf110800,
4074         0x7e000280, 0x7e020280,
4075         0x7e040280, 0x7e060280,
4076         0x7e080280, 0x7e0a0280,
4077         0x7e0c0280, 0x7e0e0280,
4078         0x80808800, 0xbe803200,
4079         0xbf84fff5, 0xbf9c0000,
4080         0xd28c0001, 0x0001007f,
4081         0xd28d0001, 0x0002027e,
4082         0x10020288, 0xb8810904,
4083         0xb7814000, 0xd1196a01,
4084         0x00000301, 0xbe800087,
4085         0xbefc00c1, 0xd89c4000,
4086         0x00020201, 0xd89cc080,
4087         0x00040401, 0x320202ff,
4088         0x00000800, 0x80808100,
4089         0xbf84fff8, 0x7e020280,
4090         0xbf810000, 0x00000000,
4091 };
4092
4093 static const u32 sgpr_init_compute_shader[] =
4094 {
4095         0xb07c0000, 0xbe8000ff,
4096         0x0000005f, 0xbee50080,
4097         0xbe812c65, 0xbe822c65,
4098         0xbe832c65, 0xbe842c65,
4099         0xbe852c65, 0xb77c0005,
4100         0x80808500, 0xbf84fff8,
4101         0xbe800080, 0xbf810000,
4102 };
4103
4104 /* When below register arrays changed, please update gpr_reg_size,
4105   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4106   to cover all gfx9 ASICs */
4107 static const struct soc15_reg_entry vgpr_init_regs[] = {
4108    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4109    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4110    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4111    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4112    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4113    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4114    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4115    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4116    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4117    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4118    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4119    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4120    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4121    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4122 };
4123
4124 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4125    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4126    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4127    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4128    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4129    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4130    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4131    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4132    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4133    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4134    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4135    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4136    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4137    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4138    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4139 };
4140
4141 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4142    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4143    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4144    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4145    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4146    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4147    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4148    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4149    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4150    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4151    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4152    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4153    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4156 };
4157
4158 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4159    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4160    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4161    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4162    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4163    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4164    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4165    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4166    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4167    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4168    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4169    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4170    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4171    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4172    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4173    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4174    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4175    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4176    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4177    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4178    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4179    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4180    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4181    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4182    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4183    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4184    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4185    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4186    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4187    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4188    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4189    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4190    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4191    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4192    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4193 };
4194
4195 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4196 {
4197         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4198         int i, r;
4199
4200         /* only support when RAS is enabled */
4201         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4202                 return 0;
4203
4204         r = amdgpu_ring_alloc(ring, 7);
4205         if (r) {
4206                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4207                         ring->name, r);
4208                 return r;
4209         }
4210
4211         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4212         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4213
4214         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4215         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4216                                 PACKET3_DMA_DATA_DST_SEL(1) |
4217                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4218                                 PACKET3_DMA_DATA_ENGINE(0)));
4219         amdgpu_ring_write(ring, 0);
4220         amdgpu_ring_write(ring, 0);
4221         amdgpu_ring_write(ring, 0);
4222         amdgpu_ring_write(ring, 0);
4223         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4224                                 adev->gds.gds_size);
4225
4226         amdgpu_ring_commit(ring);
4227
4228         for (i = 0; i < adev->usec_timeout; i++) {
4229                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4230                         break;
4231                 udelay(1);
4232         }
4233
4234         if (i >= adev->usec_timeout)
4235                 r = -ETIMEDOUT;
4236
4237         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4238
4239         return r;
4240 }
4241
4242 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4243 {
4244         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4245         struct amdgpu_ib ib;
4246         struct dma_fence *f = NULL;
4247         int r, i;
4248         unsigned total_size, vgpr_offset, sgpr_offset;
4249         u64 gpu_addr;
4250
4251         int compute_dim_x = adev->gfx.config.max_shader_engines *
4252                                                 adev->gfx.config.max_cu_per_sh *
4253                                                 adev->gfx.config.max_sh_per_se;
4254         int sgpr_work_group_size = 5;
4255         int gpr_reg_size = compute_dim_x / 16 + 6;
4256
4257         /* only support when RAS is enabled */
4258         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4259                 return 0;
4260
4261         /* bail if the compute ring is not ready */
4262         if (!ring->sched.ready)
4263                 return 0;
4264
4265         total_size =
4266                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4267         total_size +=
4268                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4269         total_size +=
4270                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4271         total_size = ALIGN(total_size, 256);
4272         vgpr_offset = total_size;
4273         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4274         sgpr_offset = total_size;
4275         total_size += sizeof(sgpr_init_compute_shader);
4276
4277         /* allocate an indirect buffer to put the commands in */
4278         memset(&ib, 0, sizeof(ib));
4279         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4280         if (r) {
4281                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4282                 return r;
4283         }
4284
4285         /* load the compute shaders */
4286         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4287                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4288
4289         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4290                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4291
4292         /* init the ib length to 0 */
4293         ib.length_dw = 0;
4294
4295         /* VGPR */
4296         /* write the register state for the compute dispatch */
4297         for (i = 0; i < gpr_reg_size; i++) {
4298                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4299                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4300                                                                 - PACKET3_SET_SH_REG_START;
4301                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4302         }
4303         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4304         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4305         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4306         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4307                                                         - PACKET3_SET_SH_REG_START;
4308         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4309         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4310
4311         /* write dispatch packet */
4312         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4313         ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4314         ib.ptr[ib.length_dw++] = 1; /* y */
4315         ib.ptr[ib.length_dw++] = 1; /* z */
4316         ib.ptr[ib.length_dw++] =
4317                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4318
4319         /* write CS partial flush packet */
4320         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4321         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4322
4323         /* SGPR1 */
4324         /* write the register state for the compute dispatch */
4325         for (i = 0; i < gpr_reg_size; i++) {
4326                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4327                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4328                                                                 - PACKET3_SET_SH_REG_START;
4329                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4330         }
4331         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4332         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4333         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4334         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4335                                                         - PACKET3_SET_SH_REG_START;
4336         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4337         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4338
4339         /* write dispatch packet */
4340         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4341         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4342         ib.ptr[ib.length_dw++] = 1; /* y */
4343         ib.ptr[ib.length_dw++] = 1; /* z */
4344         ib.ptr[ib.length_dw++] =
4345                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4346
4347         /* write CS partial flush packet */
4348         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4349         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4350
4351         /* SGPR2 */
4352         /* write the register state for the compute dispatch */
4353         for (i = 0; i < gpr_reg_size; i++) {
4354                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4355                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4356                                                                 - PACKET3_SET_SH_REG_START;
4357                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4358         }
4359         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4360         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4361         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4362         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4363                                                         - PACKET3_SET_SH_REG_START;
4364         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4365         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4366
4367         /* write dispatch packet */
4368         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4369         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4370         ib.ptr[ib.length_dw++] = 1; /* y */
4371         ib.ptr[ib.length_dw++] = 1; /* z */
4372         ib.ptr[ib.length_dw++] =
4373                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4374
4375         /* write CS partial flush packet */
4376         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4377         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4378
4379         /* shedule the ib on the ring */
4380         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4381         if (r) {
4382                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4383                 goto fail;
4384         }
4385
4386         /* wait for the GPU to finish processing the IB */
4387         r = dma_fence_wait(f, false);
4388         if (r) {
4389                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4390                 goto fail;
4391         }
4392
4393         switch (adev->asic_type)
4394         {
4395         case CHIP_VEGA20:
4396                 gfx_v9_0_clear_ras_edc_counter(adev);
4397                 break;
4398         case CHIP_ARCTURUS:
4399                 gfx_v9_4_clear_ras_edc_counter(adev);
4400                 break;
4401         default:
4402                 break;
4403         }
4404
4405 fail:
4406         amdgpu_ib_free(adev, &ib, NULL);
4407         dma_fence_put(f);
4408
4409         return r;
4410 }
4411
4412 static int gfx_v9_0_early_init(void *handle)
4413 {
4414         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4415
4416         if (adev->asic_type == CHIP_ARCTURUS)
4417                 adev->gfx.num_gfx_rings = 0;
4418         else
4419                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4420         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4421         gfx_v9_0_set_kiq_pm4_funcs(adev);
4422         gfx_v9_0_set_ring_funcs(adev);
4423         gfx_v9_0_set_irq_funcs(adev);
4424         gfx_v9_0_set_gds_init(adev);
4425         gfx_v9_0_set_rlc_funcs(adev);
4426
4427         return 0;
4428 }
4429
4430 static int gfx_v9_0_ecc_late_init(void *handle)
4431 {
4432         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4433         int r;
4434
4435         /*
4436          * Temp workaround to fix the issue that CP firmware fails to
4437          * update read pointer when CPDMA is writing clearing operation
4438          * to GDS in suspend/resume sequence on several cards. So just
4439          * limit this operation in cold boot sequence.
4440          */
4441         if (!adev->in_suspend) {
4442                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4443                 if (r)
4444                         return r;
4445         }
4446
4447         /* requires IBs so do in late init after IB pool is initialized */
4448         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4449         if (r)
4450                 return r;
4451
4452         r = amdgpu_gfx_ras_late_init(adev);
4453         if (r)
4454                 return r;
4455
4456         return 0;
4457 }
4458
4459 static int gfx_v9_0_late_init(void *handle)
4460 {
4461         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4462         int r;
4463
4464         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4465         if (r)
4466                 return r;
4467
4468         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4469         if (r)
4470                 return r;
4471
4472         r = gfx_v9_0_ecc_late_init(handle);
4473         if (r)
4474                 return r;
4475
4476         return 0;
4477 }
4478
4479 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4480 {
4481         uint32_t rlc_setting;
4482
4483         /* if RLC is not enabled, do nothing */
4484         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4485         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4486                 return false;
4487
4488         return true;
4489 }
4490
4491 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4492 {
4493         uint32_t data;
4494         unsigned i;
4495
4496         data = RLC_SAFE_MODE__CMD_MASK;
4497         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4498         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4499
4500         /* wait for RLC_SAFE_MODE */
4501         for (i = 0; i < adev->usec_timeout; i++) {
4502                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4503                         break;
4504                 udelay(1);
4505         }
4506 }
4507
4508 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4509 {
4510         uint32_t data;
4511
4512         data = RLC_SAFE_MODE__CMD_MASK;
4513         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4514 }
4515
4516 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4517                                                 bool enable)
4518 {
4519         amdgpu_gfx_rlc_enter_safe_mode(adev);
4520
4521         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4522                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4523                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4524                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4525         } else {
4526                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4527                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4528                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4529         }
4530
4531         amdgpu_gfx_rlc_exit_safe_mode(adev);
4532 }
4533
4534 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4535                                                 bool enable)
4536 {
4537         /* TODO: double check if we need to perform under safe mode */
4538         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4539
4540         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4541                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4542         else
4543                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4544
4545         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4546                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4547         else
4548                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4549
4550         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4551 }
4552
4553 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4554                                                       bool enable)
4555 {
4556         uint32_t data, def;
4557
4558         amdgpu_gfx_rlc_enter_safe_mode(adev);
4559
4560         /* It is disabled by HW by default */
4561         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4562                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4563                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4564
4565                 if (adev->asic_type != CHIP_VEGA12)
4566                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4567
4568                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4569                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4570                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4571
4572                 /* only for Vega10 & Raven1 */
4573                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4574
4575                 if (def != data)
4576                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4577
4578                 /* MGLS is a global flag to control all MGLS in GFX */
4579                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4580                         /* 2 - RLC memory Light sleep */
4581                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4582                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4583                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4584                                 if (def != data)
4585                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4586                         }
4587                         /* 3 - CP memory Light sleep */
4588                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4589                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4590                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4591                                 if (def != data)
4592                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4593                         }
4594                 }
4595         } else {
4596                 /* 1 - MGCG_OVERRIDE */
4597                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4598
4599                 if (adev->asic_type != CHIP_VEGA12)
4600                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4601
4602                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4603                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4604                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4605                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4606
4607                 if (def != data)
4608                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4609
4610                 /* 2 - disable MGLS in RLC */
4611                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4612                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4613                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4614                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4615                 }
4616
4617                 /* 3 - disable MGLS in CP */
4618                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4619                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4620                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4621                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4622                 }
4623         }
4624
4625         amdgpu_gfx_rlc_exit_safe_mode(adev);
4626 }
4627
4628 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4629                                            bool enable)
4630 {
4631         uint32_t data, def;
4632
4633         if (adev->asic_type == CHIP_ARCTURUS)
4634                 return;
4635
4636         amdgpu_gfx_rlc_enter_safe_mode(adev);
4637
4638         /* Enable 3D CGCG/CGLS */
4639         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4640                 /* write cmd to clear cgcg/cgls ov */
4641                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4642                 /* unset CGCG override */
4643                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4644                 /* update CGCG and CGLS override bits */
4645                 if (def != data)
4646                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4647
4648                 /* enable 3Dcgcg FSM(0x0000363f) */
4649                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4650
4651                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4652                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4653                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4654                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4655                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4656                 if (def != data)
4657                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4658
4659                 /* set IDLE_POLL_COUNT(0x00900100) */
4660                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4661                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4662                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4663                 if (def != data)
4664                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4665         } else {
4666                 /* Disable CGCG/CGLS */
4667                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4668                 /* disable cgcg, cgls should be disabled */
4669                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4670                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4671                 /* disable cgcg and cgls in FSM */
4672                 if (def != data)
4673                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4674         }
4675
4676         amdgpu_gfx_rlc_exit_safe_mode(adev);
4677 }
4678
4679 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4680                                                       bool enable)
4681 {
4682         uint32_t def, data;
4683
4684         amdgpu_gfx_rlc_enter_safe_mode(adev);
4685
4686         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4687                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4688                 /* unset CGCG override */
4689                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4690                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4691                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4692                 else
4693                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4694                 /* update CGCG and CGLS override bits */
4695                 if (def != data)
4696                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4697
4698                 /* enable cgcg FSM(0x0000363F) */
4699                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4700
4701                 if (adev->asic_type == CHIP_ARCTURUS)
4702                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4703                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4704                 else
4705                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4706                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4707                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4708                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4709                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4710                 if (def != data)
4711                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4712
4713                 /* set IDLE_POLL_COUNT(0x00900100) */
4714                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4715                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4716                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4717                 if (def != data)
4718                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4719         } else {
4720                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4721                 /* reset CGCG/CGLS bits */
4722                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4723                 /* disable cgcg and cgls in FSM */
4724                 if (def != data)
4725                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4726         }
4727
4728         amdgpu_gfx_rlc_exit_safe_mode(adev);
4729 }
4730
4731 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4732                                             bool enable)
4733 {
4734         if (enable) {
4735                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4736                  * ===  MGCG + MGLS ===
4737                  */
4738                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4739                 /* ===  CGCG /CGLS for GFX 3D Only === */
4740                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4741                 /* ===  CGCG + CGLS === */
4742                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4743         } else {
4744                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4745                  * ===  CGCG + CGLS ===
4746                  */
4747                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4748                 /* ===  CGCG /CGLS for GFX 3D Only === */
4749                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4750                 /* ===  MGCG + MGLS === */
4751                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4752         }
4753         return 0;
4754 }
4755
4756 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4757         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4758         .set_safe_mode = gfx_v9_0_set_safe_mode,
4759         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4760         .init = gfx_v9_0_rlc_init,
4761         .get_csb_size = gfx_v9_0_get_csb_size,
4762         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4763         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4764         .resume = gfx_v9_0_rlc_resume,
4765         .stop = gfx_v9_0_rlc_stop,
4766         .reset = gfx_v9_0_rlc_reset,
4767         .start = gfx_v9_0_rlc_start
4768 };
4769
4770 static int gfx_v9_0_set_powergating_state(void *handle,
4771                                           enum amd_powergating_state state)
4772 {
4773         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4774         bool enable = (state == AMD_PG_STATE_GATE);
4775
4776         switch (adev->asic_type) {
4777         case CHIP_RAVEN:
4778         case CHIP_RENOIR:
4779                 if (!enable) {
4780                         amdgpu_gfx_off_ctrl(adev, false);
4781                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4782                 }
4783                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4784                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4785                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4786                 } else {
4787                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4788                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4789                 }
4790
4791                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4792                         gfx_v9_0_enable_cp_power_gating(adev, true);
4793                 else
4794                         gfx_v9_0_enable_cp_power_gating(adev, false);
4795
4796                 /* update gfx cgpg state */
4797                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4798
4799                 /* update mgcg state */
4800                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4801
4802                 if (enable)
4803                         amdgpu_gfx_off_ctrl(adev, true);
4804                 break;
4805         case CHIP_VEGA12:
4806                 if (!enable) {
4807                         amdgpu_gfx_off_ctrl(adev, false);
4808                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4809                 } else {
4810                         amdgpu_gfx_off_ctrl(adev, true);
4811                 }
4812                 break;
4813         default:
4814                 break;
4815         }
4816
4817         return 0;
4818 }
4819
4820 static int gfx_v9_0_set_clockgating_state(void *handle,
4821                                           enum amd_clockgating_state state)
4822 {
4823         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4824
4825         if (amdgpu_sriov_vf(adev))
4826                 return 0;
4827
4828         switch (adev->asic_type) {
4829         case CHIP_VEGA10:
4830         case CHIP_VEGA12:
4831         case CHIP_VEGA20:
4832         case CHIP_RAVEN:
4833         case CHIP_ARCTURUS:
4834         case CHIP_RENOIR:
4835                 gfx_v9_0_update_gfx_clock_gating(adev,
4836                                                  state == AMD_CG_STATE_GATE);
4837                 break;
4838         default:
4839                 break;
4840         }
4841         return 0;
4842 }
4843
4844 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4845 {
4846         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4847         int data;
4848
4849         if (amdgpu_sriov_vf(adev))
4850                 *flags = 0;
4851
4852         /* AMD_CG_SUPPORT_GFX_MGCG */
4853         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
4854         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4855                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4856
4857         /* AMD_CG_SUPPORT_GFX_CGCG */
4858         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
4859         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4860                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4861
4862         /* AMD_CG_SUPPORT_GFX_CGLS */
4863         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4864                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4865
4866         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4867         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
4868         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4869                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4870
4871         /* AMD_CG_SUPPORT_GFX_CP_LS */
4872         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
4873         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4874                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4875
4876         if (adev->asic_type != CHIP_ARCTURUS) {
4877                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4878                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
4879                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4880                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4881
4882                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4883                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4884                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4885         }
4886 }
4887
4888 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4889 {
4890         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4891 }
4892
4893 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4894 {
4895         struct amdgpu_device *adev = ring->adev;
4896         u64 wptr;
4897
4898         /* XXX check if swapping is necessary on BE */
4899         if (ring->use_doorbell) {
4900                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4901         } else {
4902                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4903                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4904         }
4905
4906         return wptr;
4907 }
4908
4909 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4910 {
4911         struct amdgpu_device *adev = ring->adev;
4912
4913         if (ring->use_doorbell) {
4914                 /* XXX check if swapping is necessary on BE */
4915                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4916                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4917         } else {
4918                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4919                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4920         }
4921 }
4922
4923 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4924 {
4925         struct amdgpu_device *adev = ring->adev;
4926         u32 ref_and_mask, reg_mem_engine;
4927         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4928
4929         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4930                 switch (ring->me) {
4931                 case 1:
4932                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4933                         break;
4934                 case 2:
4935                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4936                         break;
4937                 default:
4938                         return;
4939                 }
4940                 reg_mem_engine = 0;
4941         } else {
4942                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4943                 reg_mem_engine = 1; /* pfp */
4944         }
4945
4946         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4947                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4948                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4949                               ref_and_mask, ref_and_mask, 0x20);
4950 }
4951
4952 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4953                                         struct amdgpu_job *job,
4954                                         struct amdgpu_ib *ib,
4955                                         uint32_t flags)
4956 {
4957         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4958         u32 header, control = 0;
4959
4960         if (ib->flags & AMDGPU_IB_FLAG_CE)
4961                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4962         else
4963                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4964
4965         control |= ib->length_dw | (vmid << 24);
4966
4967         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4968                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4969
4970                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4971                         gfx_v9_0_ring_emit_de_meta(ring);
4972         }
4973
4974         amdgpu_ring_write(ring, header);
4975         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4976         amdgpu_ring_write(ring,
4977 #ifdef __BIG_ENDIAN
4978                 (2 << 0) |
4979 #endif
4980                 lower_32_bits(ib->gpu_addr));
4981         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4982         amdgpu_ring_write(ring, control);
4983 }
4984
4985 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4986                                           struct amdgpu_job *job,
4987                                           struct amdgpu_ib *ib,
4988                                           uint32_t flags)
4989 {
4990         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4991         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4992
4993         /* Currently, there is a high possibility to get wave ID mismatch
4994          * between ME and GDS, leading to a hw deadlock, because ME generates
4995          * different wave IDs than the GDS expects. This situation happens
4996          * randomly when at least 5 compute pipes use GDS ordered append.
4997          * The wave IDs generated by ME are also wrong after suspend/resume.
4998          * Those are probably bugs somewhere else in the kernel driver.
4999          *
5000          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5001          * GDS to 0 for this ring (me/pipe).
5002          */
5003         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5004                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5005                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5006                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5007         }
5008
5009         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5010         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5011         amdgpu_ring_write(ring,
5012 #ifdef __BIG_ENDIAN
5013                                 (2 << 0) |
5014 #endif
5015                                 lower_32_bits(ib->gpu_addr));
5016         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5017         amdgpu_ring_write(ring, control);
5018 }
5019
5020 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5021                                      u64 seq, unsigned flags)
5022 {
5023         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5024         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5025         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5026
5027         /* RELEASE_MEM - flush caches, send int */
5028         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5029         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5030                                                EOP_TC_NC_ACTION_EN) :
5031                                               (EOP_TCL1_ACTION_EN |
5032                                                EOP_TC_ACTION_EN |
5033                                                EOP_TC_WB_ACTION_EN |
5034                                                EOP_TC_MD_ACTION_EN)) |
5035                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5036                                  EVENT_INDEX(5)));
5037         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5038
5039         /*
5040          * the address should be Qword aligned if 64bit write, Dword
5041          * aligned if only send 32bit data low (discard data high)
5042          */
5043         if (write64bit)
5044                 BUG_ON(addr & 0x7);
5045         else
5046                 BUG_ON(addr & 0x3);
5047         amdgpu_ring_write(ring, lower_32_bits(addr));
5048         amdgpu_ring_write(ring, upper_32_bits(addr));
5049         amdgpu_ring_write(ring, lower_32_bits(seq));
5050         amdgpu_ring_write(ring, upper_32_bits(seq));
5051         amdgpu_ring_write(ring, 0);
5052 }
5053
5054 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5055 {
5056         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5057         uint32_t seq = ring->fence_drv.sync_seq;
5058         uint64_t addr = ring->fence_drv.gpu_addr;
5059
5060         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5061                               lower_32_bits(addr), upper_32_bits(addr),
5062                               seq, 0xffffffff, 4);
5063 }
5064
5065 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5066                                         unsigned vmid, uint64_t pd_addr)
5067 {
5068         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5069
5070         /* compute doesn't have PFP */
5071         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5072                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5073                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5074                 amdgpu_ring_write(ring, 0x0);
5075         }
5076 }
5077
5078 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5079 {
5080         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5081 }
5082
5083 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5084 {
5085         u64 wptr;
5086
5087         /* XXX check if swapping is necessary on BE */
5088         if (ring->use_doorbell)
5089                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5090         else
5091                 BUG();
5092         return wptr;
5093 }
5094
5095 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5096                                            bool acquire)
5097 {
5098         struct amdgpu_device *adev = ring->adev;
5099         int pipe_num, tmp, reg;
5100         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5101
5102         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5103
5104         /* first me only has 2 entries, GFX and HP3D */
5105         if (ring->me > 0)
5106                 pipe_num -= 2;
5107
5108         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5109         tmp = RREG32(reg);
5110         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5111         WREG32(reg, tmp);
5112 }
5113
5114 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5115                                             struct amdgpu_ring *ring,
5116                                             bool acquire)
5117 {
5118         int i, pipe;
5119         bool reserve;
5120         struct amdgpu_ring *iring;
5121
5122         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5123         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5124         if (acquire)
5125                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5126         else
5127                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5128
5129         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5130                 /* Clear all reservations - everyone reacquires all resources */
5131                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5132                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5133                                                        true);
5134
5135                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5136                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5137                                                        true);
5138         } else {
5139                 /* Lower all pipes without a current reservation */
5140                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5141                         iring = &adev->gfx.gfx_ring[i];
5142                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5143                                                            iring->me,
5144                                                            iring->pipe,
5145                                                            0);
5146                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5147                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5148                 }
5149
5150                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5151                         iring = &adev->gfx.compute_ring[i];
5152                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5153                                                            iring->me,
5154                                                            iring->pipe,
5155                                                            0);
5156                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5157                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5158                 }
5159         }
5160
5161         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5162 }
5163
5164 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5165                                       struct amdgpu_ring *ring,
5166                                       bool acquire)
5167 {
5168         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5169         uint32_t queue_priority = acquire ? 0xf : 0x0;
5170
5171         mutex_lock(&adev->srbm_mutex);
5172         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5173
5174         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5175         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5176
5177         soc15_grbm_select(adev, 0, 0, 0, 0);
5178         mutex_unlock(&adev->srbm_mutex);
5179 }
5180
5181 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5182                                                enum drm_sched_priority priority)
5183 {
5184         struct amdgpu_device *adev = ring->adev;
5185         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5186
5187         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5188                 return;
5189
5190         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5191         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5192 }
5193
5194 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5195 {
5196         struct amdgpu_device *adev = ring->adev;
5197
5198         /* XXX check if swapping is necessary on BE */
5199         if (ring->use_doorbell) {
5200                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5201                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5202         } else{
5203                 BUG(); /* only DOORBELL method supported on gfx9 now */
5204         }
5205 }
5206
5207 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5208                                          u64 seq, unsigned int flags)
5209 {
5210         struct amdgpu_device *adev = ring->adev;
5211
5212         /* we only allocate 32bit for each seq wb address */
5213         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5214
5215         /* write fence seq to the "addr" */
5216         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5217         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5218                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5219         amdgpu_ring_write(ring, lower_32_bits(addr));
5220         amdgpu_ring_write(ring, upper_32_bits(addr));
5221         amdgpu_ring_write(ring, lower_32_bits(seq));
5222
5223         if (flags & AMDGPU_FENCE_FLAG_INT) {
5224                 /* set register to trigger INT */
5225                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5226                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5227                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5228                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5229                 amdgpu_ring_write(ring, 0);
5230                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5231         }
5232 }
5233
5234 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5235 {
5236         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5237         amdgpu_ring_write(ring, 0);
5238 }
5239
5240 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5241 {
5242         struct v9_ce_ib_state ce_payload = {0};
5243         uint64_t csa_addr;
5244         int cnt;
5245
5246         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5247         csa_addr = amdgpu_csa_vaddr(ring->adev);
5248
5249         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5250         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5251                                  WRITE_DATA_DST_SEL(8) |
5252                                  WR_CONFIRM) |
5253                                  WRITE_DATA_CACHE_POLICY(0));
5254         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5255         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5256         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5257 }
5258
5259 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5260 {
5261         struct v9_de_ib_state de_payload = {0};
5262         uint64_t csa_addr, gds_addr;
5263         int cnt;
5264
5265         csa_addr = amdgpu_csa_vaddr(ring->adev);
5266         gds_addr = csa_addr + 4096;
5267         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5268         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5269
5270         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5271         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5272         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5273                                  WRITE_DATA_DST_SEL(8) |
5274                                  WR_CONFIRM) |
5275                                  WRITE_DATA_CACHE_POLICY(0));
5276         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5277         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5278         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5279 }
5280
5281 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5282 {
5283         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5284         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5285 }
5286
5287 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5288 {
5289         uint32_t dw2 = 0;
5290
5291         if (amdgpu_sriov_vf(ring->adev))
5292                 gfx_v9_0_ring_emit_ce_meta(ring);
5293
5294         gfx_v9_0_ring_emit_tmz(ring, true);
5295
5296         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5297         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5298                 /* set load_global_config & load_global_uconfig */
5299                 dw2 |= 0x8001;
5300                 /* set load_cs_sh_regs */
5301                 dw2 |= 0x01000000;
5302                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5303                 dw2 |= 0x10002;
5304
5305                 /* set load_ce_ram if preamble presented */
5306                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5307                         dw2 |= 0x10000000;
5308         } else {
5309                 /* still load_ce_ram if this is the first time preamble presented
5310                  * although there is no context switch happens.
5311                  */
5312                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5313                         dw2 |= 0x10000000;
5314         }
5315
5316         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5317         amdgpu_ring_write(ring, dw2);
5318         amdgpu_ring_write(ring, 0);
5319 }
5320
5321 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5322 {
5323         unsigned ret;
5324         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5325         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5326         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5327         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5328         ret = ring->wptr & ring->buf_mask;
5329         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5330         return ret;
5331 }
5332
5333 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5334 {
5335         unsigned cur;
5336         BUG_ON(offset > ring->buf_mask);
5337         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5338
5339         cur = (ring->wptr & ring->buf_mask) - 1;
5340         if (likely(cur > offset))
5341                 ring->ring[offset] = cur - offset;
5342         else
5343                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5344 }
5345
5346 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5347 {
5348         struct amdgpu_device *adev = ring->adev;
5349         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5350
5351         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5352         amdgpu_ring_write(ring, 0 |     /* src: register*/
5353                                 (5 << 8) |      /* dst: memory */
5354                                 (1 << 20));     /* write confirm */
5355         amdgpu_ring_write(ring, reg);
5356         amdgpu_ring_write(ring, 0);
5357         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5358                                 kiq->reg_val_offs * 4));
5359         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5360                                 kiq->reg_val_offs * 4));
5361 }
5362
5363 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5364                                     uint32_t val)
5365 {
5366         uint32_t cmd = 0;
5367
5368         switch (ring->funcs->type) {
5369         case AMDGPU_RING_TYPE_GFX:
5370                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5371                 break;
5372         case AMDGPU_RING_TYPE_KIQ:
5373                 cmd = (1 << 16); /* no inc addr */
5374                 break;
5375         default:
5376                 cmd = WR_CONFIRM;
5377                 break;
5378         }
5379         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5380         amdgpu_ring_write(ring, cmd);
5381         amdgpu_ring_write(ring, reg);
5382         amdgpu_ring_write(ring, 0);
5383         amdgpu_ring_write(ring, val);
5384 }
5385
5386 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5387                                         uint32_t val, uint32_t mask)
5388 {
5389         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5390 }
5391
5392 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5393                                                   uint32_t reg0, uint32_t reg1,
5394                                                   uint32_t ref, uint32_t mask)
5395 {
5396         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5397         struct amdgpu_device *adev = ring->adev;
5398         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5399                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5400
5401         if (fw_version_ok)
5402                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5403                                       ref, mask, 0x20);
5404         else
5405                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5406                                                            ref, mask);
5407 }
5408
5409 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5410 {
5411         struct amdgpu_device *adev = ring->adev;
5412         uint32_t value = 0;
5413
5414         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5415         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5416         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5417         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5418         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5419 }
5420
5421 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5422                                                  enum amdgpu_interrupt_state state)
5423 {
5424         switch (state) {
5425         case AMDGPU_IRQ_STATE_DISABLE:
5426         case AMDGPU_IRQ_STATE_ENABLE:
5427                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5428                                TIME_STAMP_INT_ENABLE,
5429                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5430                 break;
5431         default:
5432                 break;
5433         }
5434 }
5435
5436 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5437                                                      int me, int pipe,
5438                                                      enum amdgpu_interrupt_state state)
5439 {
5440         u32 mec_int_cntl, mec_int_cntl_reg;
5441
5442         /*
5443          * amdgpu controls only the first MEC. That's why this function only
5444          * handles the setting of interrupts for this specific MEC. All other
5445          * pipes' interrupts are set by amdkfd.
5446          */
5447
5448         if (me == 1) {
5449                 switch (pipe) {
5450                 case 0:
5451                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5452                         break;
5453                 case 1:
5454                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5455                         break;
5456                 case 2:
5457                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5458                         break;
5459                 case 3:
5460                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5461                         break;
5462                 default:
5463                         DRM_DEBUG("invalid pipe %d\n", pipe);
5464                         return;
5465                 }
5466         } else {
5467                 DRM_DEBUG("invalid me %d\n", me);
5468                 return;
5469         }
5470
5471         switch (state) {
5472         case AMDGPU_IRQ_STATE_DISABLE:
5473                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5474                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5475                                              TIME_STAMP_INT_ENABLE, 0);
5476                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5477                 break;
5478         case AMDGPU_IRQ_STATE_ENABLE:
5479                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5480                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5481                                              TIME_STAMP_INT_ENABLE, 1);
5482                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5483                 break;
5484         default:
5485                 break;
5486         }
5487 }
5488
5489 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5490                                              struct amdgpu_irq_src *source,
5491                                              unsigned type,
5492                                              enum amdgpu_interrupt_state state)
5493 {
5494         switch (state) {
5495         case AMDGPU_IRQ_STATE_DISABLE:
5496         case AMDGPU_IRQ_STATE_ENABLE:
5497                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5498                                PRIV_REG_INT_ENABLE,
5499                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5500                 break;
5501         default:
5502                 break;
5503         }
5504
5505         return 0;
5506 }
5507
5508 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5509                                               struct amdgpu_irq_src *source,
5510                                               unsigned type,
5511                                               enum amdgpu_interrupt_state state)
5512 {
5513         switch (state) {
5514         case AMDGPU_IRQ_STATE_DISABLE:
5515         case AMDGPU_IRQ_STATE_ENABLE:
5516                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5517                                PRIV_INSTR_INT_ENABLE,
5518                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5519         default:
5520                 break;
5521         }
5522
5523         return 0;
5524 }
5525
5526 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5527         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5528                         CP_ECC_ERROR_INT_ENABLE, 1)
5529
5530 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5531         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5532                         CP_ECC_ERROR_INT_ENABLE, 0)
5533
5534 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5535                                               struct amdgpu_irq_src *source,
5536                                               unsigned type,
5537                                               enum amdgpu_interrupt_state state)
5538 {
5539         switch (state) {
5540         case AMDGPU_IRQ_STATE_DISABLE:
5541                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5542                                 CP_ECC_ERROR_INT_ENABLE, 0);
5543                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5544                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5545                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5546                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5547                 break;
5548
5549         case AMDGPU_IRQ_STATE_ENABLE:
5550                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5551                                 CP_ECC_ERROR_INT_ENABLE, 1);
5552                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5553                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5554                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5555                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5556                 break;
5557         default:
5558                 break;
5559         }
5560
5561         return 0;
5562 }
5563
5564
5565 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5566                                             struct amdgpu_irq_src *src,
5567                                             unsigned type,
5568                                             enum amdgpu_interrupt_state state)
5569 {
5570         switch (type) {
5571         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5572                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5573                 break;
5574         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5575                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5576                 break;
5577         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5578                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5579                 break;
5580         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5581                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5582                 break;
5583         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5584                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5585                 break;
5586         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5587                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5588                 break;
5589         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5590                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5591                 break;
5592         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5593                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5594                 break;
5595         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5596                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5597                 break;
5598         default:
5599                 break;
5600         }
5601         return 0;
5602 }
5603
5604 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5605                             struct amdgpu_irq_src *source,
5606                             struct amdgpu_iv_entry *entry)
5607 {
5608         int i;
5609         u8 me_id, pipe_id, queue_id;
5610         struct amdgpu_ring *ring;
5611
5612         DRM_DEBUG("IH: CP EOP\n");
5613         me_id = (entry->ring_id & 0x0c) >> 2;
5614         pipe_id = (entry->ring_id & 0x03) >> 0;
5615         queue_id = (entry->ring_id & 0x70) >> 4;
5616
5617         switch (me_id) {
5618         case 0:
5619                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5620                 break;
5621         case 1:
5622         case 2:
5623                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5624                         ring = &adev->gfx.compute_ring[i];
5625                         /* Per-queue interrupt is supported for MEC starting from VI.
5626                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5627                           */
5628                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5629                                 amdgpu_fence_process(ring);
5630                 }
5631                 break;
5632         }
5633         return 0;
5634 }
5635
5636 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5637                            struct amdgpu_iv_entry *entry)
5638 {
5639         u8 me_id, pipe_id, queue_id;
5640         struct amdgpu_ring *ring;
5641         int i;
5642
5643         me_id = (entry->ring_id & 0x0c) >> 2;
5644         pipe_id = (entry->ring_id & 0x03) >> 0;
5645         queue_id = (entry->ring_id & 0x70) >> 4;
5646
5647         switch (me_id) {
5648         case 0:
5649                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5650                 break;
5651         case 1:
5652         case 2:
5653                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5654                         ring = &adev->gfx.compute_ring[i];
5655                         if (ring->me == me_id && ring->pipe == pipe_id &&
5656                             ring->queue == queue_id)
5657                                 drm_sched_fault(&ring->sched);
5658                 }
5659                 break;
5660         }
5661 }
5662
5663 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5664                                  struct amdgpu_irq_src *source,
5665                                  struct amdgpu_iv_entry *entry)
5666 {
5667         DRM_ERROR("Illegal register access in command stream\n");
5668         gfx_v9_0_fault(adev, entry);
5669         return 0;
5670 }
5671
5672 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5673                                   struct amdgpu_irq_src *source,
5674                                   struct amdgpu_iv_entry *entry)
5675 {
5676         DRM_ERROR("Illegal instruction in command stream\n");
5677         gfx_v9_0_fault(adev, entry);
5678         return 0;
5679 }
5680
5681
5682 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5683         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5684           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5685           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5686         },
5687         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5688           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5689           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5690         },
5691         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5692           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5693           0, 0
5694         },
5695         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5696           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5697           0, 0
5698         },
5699         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5700           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5701           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5702         },
5703         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5704           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5705           0, 0
5706         },
5707         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5708           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5709           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5710         },
5711         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5712           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5713           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5714         },
5715         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5716           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5717           0, 0
5718         },
5719         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5720           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5721           0, 0
5722         },
5723         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5724           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5725           0, 0
5726         },
5727         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5728           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5729           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5730         },
5731         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5732           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5733           0, 0
5734         },
5735         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5736           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5737           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5738         },
5739         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5740           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5741           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5742           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5743         },
5744         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5745           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5746           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5747           0, 0
5748         },
5749         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5750           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5751           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5752           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5753         },
5754         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5755           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5756           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5757           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5758         },
5759         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5760           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5761           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5762           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5763         },
5764         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5765           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5766           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5767           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5768         },
5769         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5770           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5771           0, 0
5772         },
5773         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5774           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5775           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5776         },
5777         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5778           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5779           0, 0
5780         },
5781         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5782           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5783           0, 0
5784         },
5785         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5786           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5787           0, 0
5788         },
5789         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5790           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5791           0, 0
5792         },
5793         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5794           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5795           0, 0
5796         },
5797         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5798           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5799           0, 0
5800         },
5801         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5802           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5803           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5804         },
5805         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5806           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5807           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5808         },
5809         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5810           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5811           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5812         },
5813         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5814           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5815           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5816         },
5817         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5818           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5819           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5820         },
5821         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5822           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5823           0, 0
5824         },
5825         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5826           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5827           0, 0
5828         },
5829         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5830           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5831           0, 0
5832         },
5833         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5834           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5835           0, 0
5836         },
5837         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5838           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5839           0, 0
5840         },
5841         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5842           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5843           0, 0
5844         },
5845         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5846           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5847           0, 0
5848         },
5849         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5850           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5851           0, 0
5852         },
5853         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5854           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5855           0, 0
5856         },
5857         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5858           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5859           0, 0
5860         },
5861         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5862           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5863           0, 0
5864         },
5865         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5866           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5867           0, 0
5868         },
5869         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5870           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5871           0, 0
5872         },
5873         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5874           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5875           0, 0
5876         },
5877         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5878           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5879           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5880         },
5881         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5882           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5883           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5884         },
5885         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5886           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5887           0, 0
5888         },
5889         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5890           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5891           0, 0
5892         },
5893         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5894           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5895           0, 0
5896         },
5897         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5898           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5899           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5900         },
5901         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5902           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5903           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5904         },
5905         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5906           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5907           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5908         },
5909         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5910           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5911           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5912         },
5913         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5914           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5915           0, 0
5916         },
5917         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5918           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5919           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5920         },
5921         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5922           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5923           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5924         },
5925         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5926           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5927           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5928         },
5929         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5930           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5931           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5932         },
5933         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5934           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5935           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5936         },
5937         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5938           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5939           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5940         },
5941         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5942           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5943           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5944         },
5945         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5946           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5947           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5948         },
5949         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5950           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5951           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5952         },
5953         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5954           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5955           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5956         },
5957         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5958           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5959           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5960         },
5961         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5962           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5963           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5964         },
5965         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5966           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5967           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5968         },
5969         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5970           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5971           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5972         },
5973         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5974           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5975           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5976         },
5977         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5978           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5979           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5980         },
5981         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5982           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5983           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5984         },
5985         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5986           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5987           0, 0
5988         },
5989         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5990           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5991           0, 0
5992         },
5993         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5994           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5995           0, 0
5996         },
5997         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5998           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5999           0, 0
6000         },
6001         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6002           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6003           0, 0
6004         },
6005         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6006           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6007           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6008         },
6009         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6010           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6011           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6012         },
6013         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6014           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6015           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6016         },
6017         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6018           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6019           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6020         },
6021         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6022           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6023           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6024         },
6025         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6026           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6027           0, 0
6028         },
6029         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6030           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6031           0, 0
6032         },
6033         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6034           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6035           0, 0
6036         },
6037         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6038           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6039           0, 0
6040         },
6041         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6042           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6043           0, 0
6044         },
6045         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6046           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6047           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6048         },
6049         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6050           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6051           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6052         },
6053         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6054           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6055           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6056         },
6057         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6058           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6059           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6060         },
6061         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6062           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6063           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6064         },
6065         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6066           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6067           0, 0
6068         },
6069         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6070           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6071           0, 0
6072         },
6073         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6074           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6075           0, 0
6076         },
6077         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6078           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6079           0, 0
6080         },
6081         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6082           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6083           0, 0
6084         },
6085         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6086           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6087           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6088         },
6089         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6090           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6091           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6092         },
6093         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6094           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6095           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6096         },
6097         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6098           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6099           0, 0
6100         },
6101         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6102           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6103           0, 0
6104         },
6105         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6106           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6107           0, 0
6108         },
6109         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6110           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6111           0, 0
6112         },
6113         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6114           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6115           0, 0
6116         },
6117         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6118           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6119           0, 0
6120         }
6121 };
6122
6123 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6124                                      void *inject_if)
6125 {
6126         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6127         int ret;
6128         struct ta_ras_trigger_error_input block_info = { 0 };
6129
6130         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6131                 return -EINVAL;
6132
6133         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6134                 return -EINVAL;
6135
6136         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6137                 return -EPERM;
6138
6139         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6140               info->head.type)) {
6141                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6142                         ras_gfx_subblocks[info->head.sub_block_index].name,
6143                         info->head.type);
6144                 return -EPERM;
6145         }
6146
6147         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6148               info->head.type)) {
6149                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6150                         ras_gfx_subblocks[info->head.sub_block_index].name,
6151                         info->head.type);
6152                 return -EPERM;
6153         }
6154
6155         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6156         block_info.sub_block_index =
6157                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6158         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6159         block_info.address = info->address;
6160         block_info.value = info->value;
6161
6162         mutex_lock(&adev->grbm_idx_mutex);
6163         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6164         mutex_unlock(&adev->grbm_idx_mutex);
6165
6166         return ret;
6167 }
6168
6169 static const char *vml2_mems[] = {
6170         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6171         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6172         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6173         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6174         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6175         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6176         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6177         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6178         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6179         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6180         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6181         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6182         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6183         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6184         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6185         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6186 };
6187
6188 static const char *vml2_walker_mems[] = {
6189         "UTC_VML2_CACHE_PDE0_MEM0",
6190         "UTC_VML2_CACHE_PDE0_MEM1",
6191         "UTC_VML2_CACHE_PDE1_MEM0",
6192         "UTC_VML2_CACHE_PDE1_MEM1",
6193         "UTC_VML2_CACHE_PDE2_MEM0",
6194         "UTC_VML2_CACHE_PDE2_MEM1",
6195         "UTC_VML2_RDIF_LOG_FIFO",
6196 };
6197
6198 static const char *atc_l2_cache_2m_mems[] = {
6199         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6200         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6201         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6202         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6203 };
6204
6205 static const char *atc_l2_cache_4k_mems[] = {
6206         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6207         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6208         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6209         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6210         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6211         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6212         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6213         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6214         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6215         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6216         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6217         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6218         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6219         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6220         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6221         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6222         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6223         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6224         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6225         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6226         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6227         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6228         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6229         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6230         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6231         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6232         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6233         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6234         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6235         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6236         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6237         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6238 };
6239
6240 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6241                                          struct ras_err_data *err_data)
6242 {
6243         uint32_t i, data;
6244         uint32_t sec_count, ded_count;
6245
6246         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6247         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6248         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6249         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6250         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6251         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6252         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6253         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6254
6255         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6256                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6257                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6258
6259                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6260                 if (sec_count) {
6261                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6262                                  vml2_mems[i], sec_count);
6263                         err_data->ce_count += sec_count;
6264                 }
6265
6266                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6267                 if (ded_count) {
6268                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6269                                  vml2_mems[i], ded_count);
6270                         err_data->ue_count += ded_count;
6271                 }
6272         }
6273
6274         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6275                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6276                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6277
6278                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6279                                                 SEC_COUNT);
6280                 if (sec_count) {
6281                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6282                                  vml2_walker_mems[i], sec_count);
6283                         err_data->ce_count += sec_count;
6284                 }
6285
6286                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6287                                                 DED_COUNT);
6288                 if (ded_count) {
6289                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6290                                  vml2_walker_mems[i], ded_count);
6291                         err_data->ue_count += ded_count;
6292                 }
6293         }
6294
6295         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6296                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6297                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6298
6299                 sec_count = (data & 0x00006000L) >> 0xd;
6300                 if (sec_count) {
6301                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6302                                  atc_l2_cache_2m_mems[i], sec_count);
6303                         err_data->ce_count += sec_count;
6304                 }
6305         }
6306
6307         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6308                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6309                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6310
6311                 sec_count = (data & 0x00006000L) >> 0xd;
6312                 if (sec_count) {
6313                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6314                                  atc_l2_cache_4k_mems[i], sec_count);
6315                         err_data->ce_count += sec_count;
6316                 }
6317
6318                 ded_count = (data & 0x00018000L) >> 0xf;
6319                 if (ded_count) {
6320                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6321                                  atc_l2_cache_4k_mems[i], ded_count);
6322                         err_data->ue_count += ded_count;
6323                 }
6324         }
6325
6326         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6327         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6328         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6329         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6330
6331         return 0;
6332 }
6333
6334 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6335         uint32_t se_id, uint32_t inst_id, uint32_t value,
6336         uint32_t *sec_count, uint32_t *ded_count)
6337 {
6338         uint32_t i;
6339         uint32_t sec_cnt, ded_cnt;
6340
6341         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6342                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6343                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6344                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6345                         continue;
6346
6347                 sec_cnt = (value &
6348                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6349                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6350                 if (sec_cnt) {
6351                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6352                                 gfx_v9_0_ras_fields[i].name,
6353                                 se_id, inst_id,
6354                                 sec_cnt);
6355                         *sec_count += sec_cnt;
6356                 }
6357
6358                 ded_cnt = (value &
6359                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6360                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6361                 if (ded_cnt) {
6362                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6363                                 gfx_v9_0_ras_fields[i].name,
6364                                 se_id, inst_id,
6365                                 ded_cnt);
6366                         *ded_count += ded_cnt;
6367                 }
6368         }
6369
6370         return 0;
6371 }
6372
6373 static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev)
6374 {
6375         int i, j, k;
6376
6377         /* read back registers to clear the counters */
6378         mutex_lock(&adev->grbm_idx_mutex);
6379         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6380                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6381                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6382                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6383                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6384                         }
6385                 }
6386         }
6387         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6388         mutex_unlock(&adev->grbm_idx_mutex);
6389
6390         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6391         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6392         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6393         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6394         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6395         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6396         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6397         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6398
6399         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6400                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6401                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6402         }
6403
6404         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6405                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6406                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6407         }
6408
6409         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6410                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6411                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6412         }
6413
6414         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6415                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6416                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6417         }
6418
6419         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6420         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6421         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6422         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6423 }
6424
6425 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6426                                           void *ras_error_status)
6427 {
6428         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6429         uint32_t sec_count = 0, ded_count = 0;
6430         uint32_t i, j, k;
6431         uint32_t reg_value;
6432
6433         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6434                 return -EINVAL;
6435
6436         err_data->ue_count = 0;
6437         err_data->ce_count = 0;
6438
6439         mutex_lock(&adev->grbm_idx_mutex);
6440
6441         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6442                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6443                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6444                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6445                                 reg_value =
6446                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6447                                 if (reg_value)
6448                                         gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6449                                                         j, k, reg_value,
6450                                                         &sec_count, &ded_count);
6451                         }
6452                 }
6453         }
6454
6455         err_data->ce_count += sec_count;
6456         err_data->ue_count += ded_count;
6457
6458         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6459         mutex_unlock(&adev->grbm_idx_mutex);
6460
6461         gfx_v9_0_query_utc_edc_status(adev, err_data);
6462
6463         return 0;
6464 }
6465
6466 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6467         .name = "gfx_v9_0",
6468         .early_init = gfx_v9_0_early_init,
6469         .late_init = gfx_v9_0_late_init,
6470         .sw_init = gfx_v9_0_sw_init,
6471         .sw_fini = gfx_v9_0_sw_fini,
6472         .hw_init = gfx_v9_0_hw_init,
6473         .hw_fini = gfx_v9_0_hw_fini,
6474         .suspend = gfx_v9_0_suspend,
6475         .resume = gfx_v9_0_resume,
6476         .is_idle = gfx_v9_0_is_idle,
6477         .wait_for_idle = gfx_v9_0_wait_for_idle,
6478         .soft_reset = gfx_v9_0_soft_reset,
6479         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6480         .set_powergating_state = gfx_v9_0_set_powergating_state,
6481         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6482 };
6483
6484 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6485         .type = AMDGPU_RING_TYPE_GFX,
6486         .align_mask = 0xff,
6487         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6488         .support_64bit_ptrs = true,
6489         .vmhub = AMDGPU_GFXHUB_0,
6490         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6491         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6492         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6493         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6494                 5 +  /* COND_EXEC */
6495                 7 +  /* PIPELINE_SYNC */
6496                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6497                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6498                 2 + /* VM_FLUSH */
6499                 8 +  /* FENCE for VM_FLUSH */
6500                 20 + /* GDS switch */
6501                 4 + /* double SWITCH_BUFFER,
6502                        the first COND_EXEC jump to the place just
6503                            prior to this double SWITCH_BUFFER  */
6504                 5 + /* COND_EXEC */
6505                 7 +      /*     HDP_flush */
6506                 4 +      /*     VGT_flush */
6507                 14 + /* CE_META */
6508                 31 + /* DE_META */
6509                 3 + /* CNTX_CTRL */
6510                 5 + /* HDP_INVL */
6511                 8 + 8 + /* FENCE x2 */
6512                 2, /* SWITCH_BUFFER */
6513         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6514         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6515         .emit_fence = gfx_v9_0_ring_emit_fence,
6516         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6517         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6518         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6519         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6520         .test_ring = gfx_v9_0_ring_test_ring,
6521         .test_ib = gfx_v9_0_ring_test_ib,
6522         .insert_nop = amdgpu_ring_insert_nop,
6523         .pad_ib = amdgpu_ring_generic_pad_ib,
6524         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6525         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6526         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6527         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6528         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6529         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6530         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6531         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6532         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6533 };
6534
6535 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6536         .type = AMDGPU_RING_TYPE_COMPUTE,
6537         .align_mask = 0xff,
6538         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6539         .support_64bit_ptrs = true,
6540         .vmhub = AMDGPU_GFXHUB_0,
6541         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6542         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6543         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6544         .emit_frame_size =
6545                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6546                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6547                 5 + /* hdp invalidate */
6548                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6549                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6550                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6551                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6552                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6553         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6554         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6555         .emit_fence = gfx_v9_0_ring_emit_fence,
6556         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6557         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6558         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6559         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6560         .test_ring = gfx_v9_0_ring_test_ring,
6561         .test_ib = gfx_v9_0_ring_test_ib,
6562         .insert_nop = amdgpu_ring_insert_nop,
6563         .pad_ib = amdgpu_ring_generic_pad_ib,
6564         .set_priority = gfx_v9_0_ring_set_priority_compute,
6565         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6566         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6567         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6568 };
6569
6570 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6571         .type = AMDGPU_RING_TYPE_KIQ,
6572         .align_mask = 0xff,
6573         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6574         .support_64bit_ptrs = true,
6575         .vmhub = AMDGPU_GFXHUB_0,
6576         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6577         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6578         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6579         .emit_frame_size =
6580                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6581                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6582                 5 + /* hdp invalidate */
6583                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6584                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6585                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6586                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6587                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6588         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6589         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6590         .test_ring = gfx_v9_0_ring_test_ring,
6591         .insert_nop = amdgpu_ring_insert_nop,
6592         .pad_ib = amdgpu_ring_generic_pad_ib,
6593         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6594         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6595         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6596         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6597 };
6598
6599 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6600 {
6601         int i;
6602
6603         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6604
6605         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6606                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6607
6608         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6609                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6610 }
6611
6612 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6613         .set = gfx_v9_0_set_eop_interrupt_state,
6614         .process = gfx_v9_0_eop_irq,
6615 };
6616
6617 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6618         .set = gfx_v9_0_set_priv_reg_fault_state,
6619         .process = gfx_v9_0_priv_reg_irq,
6620 };
6621
6622 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6623         .set = gfx_v9_0_set_priv_inst_fault_state,
6624         .process = gfx_v9_0_priv_inst_irq,
6625 };
6626
6627 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6628         .set = gfx_v9_0_set_cp_ecc_error_state,
6629         .process = amdgpu_gfx_cp_ecc_error_irq,
6630 };
6631
6632
6633 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6634 {
6635         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6636         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6637
6638         adev->gfx.priv_reg_irq.num_types = 1;
6639         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6640
6641         adev->gfx.priv_inst_irq.num_types = 1;
6642         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6643
6644         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6645         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6646 }
6647
6648 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6649 {
6650         switch (adev->asic_type) {
6651         case CHIP_VEGA10:
6652         case CHIP_VEGA12:
6653         case CHIP_VEGA20:
6654         case CHIP_RAVEN:
6655         case CHIP_ARCTURUS:
6656         case CHIP_RENOIR:
6657                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6658                 break;
6659         default:
6660                 break;
6661         }
6662 }
6663
6664 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6665 {
6666         /* init asci gds info */
6667         switch (adev->asic_type) {
6668         case CHIP_VEGA10:
6669         case CHIP_VEGA12:
6670         case CHIP_VEGA20:
6671                 adev->gds.gds_size = 0x10000;
6672                 break;
6673         case CHIP_RAVEN:
6674         case CHIP_ARCTURUS:
6675                 adev->gds.gds_size = 0x1000;
6676                 break;
6677         default:
6678                 adev->gds.gds_size = 0x10000;
6679                 break;
6680         }
6681
6682         switch (adev->asic_type) {
6683         case CHIP_VEGA10:
6684         case CHIP_VEGA20:
6685                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6686                 break;
6687         case CHIP_VEGA12:
6688                 adev->gds.gds_compute_max_wave_id = 0x27f;
6689                 break;
6690         case CHIP_RAVEN:
6691                 if (adev->rev_id >= 0x8)
6692                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6693                 else
6694                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6695                 break;
6696         case CHIP_ARCTURUS:
6697                 adev->gds.gds_compute_max_wave_id = 0xfff;
6698                 break;
6699         default:
6700                 /* this really depends on the chip */
6701                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6702                 break;
6703         }
6704
6705         adev->gds.gws_size = 64;
6706         adev->gds.oa_size = 16;
6707 }
6708
6709 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6710                                                  u32 bitmap)
6711 {
6712         u32 data;
6713
6714         if (!bitmap)
6715                 return;
6716
6717         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6718         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6719
6720         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6721 }
6722
6723 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6724 {
6725         u32 data, mask;
6726
6727         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6728         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6729
6730         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6731         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6732
6733         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6734
6735         return (~data) & mask;
6736 }
6737
6738 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6739                                  struct amdgpu_cu_info *cu_info)
6740 {
6741         int i, j, k, counter, active_cu_number = 0;
6742         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6743         unsigned disable_masks[4 * 4];
6744
6745         if (!adev || !cu_info)
6746                 return -EINVAL;
6747
6748         /*
6749          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6750          */
6751         if (adev->gfx.config.max_shader_engines *
6752                 adev->gfx.config.max_sh_per_se > 16)
6753                 return -EINVAL;
6754
6755         amdgpu_gfx_parse_disable_cu(disable_masks,
6756                                     adev->gfx.config.max_shader_engines,
6757                                     adev->gfx.config.max_sh_per_se);
6758
6759         mutex_lock(&adev->grbm_idx_mutex);
6760         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6761                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6762                         mask = 1;
6763                         ao_bitmap = 0;
6764                         counter = 0;
6765                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6766                         gfx_v9_0_set_user_cu_inactive_bitmap(
6767                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6768                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6769
6770                         /*
6771                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6772                          * 4x4 size array, and it's usually suitable for Vega
6773                          * ASICs which has 4*2 SE/SH layout.
6774                          * But for Arcturus, SE/SH layout is changed to 8*1.
6775                          * To mostly reduce the impact, we make it compatible
6776                          * with current bitmap array as below:
6777                          *    SE4,SH0 --> bitmap[0][1]
6778                          *    SE5,SH0 --> bitmap[1][1]
6779                          *    SE6,SH0 --> bitmap[2][1]
6780                          *    SE7,SH0 --> bitmap[3][1]
6781                          */
6782                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6783
6784                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6785                                 if (bitmap & mask) {
6786                                         if (counter < adev->gfx.config.max_cu_per_sh)
6787                                                 ao_bitmap |= mask;
6788                                         counter ++;
6789                                 }
6790                                 mask <<= 1;
6791                         }
6792                         active_cu_number += counter;
6793                         if (i < 2 && j < 2)
6794                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6795                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6796                 }
6797         }
6798         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6799         mutex_unlock(&adev->grbm_idx_mutex);
6800
6801         cu_info->number = active_cu_number;
6802         cu_info->ao_cu_mask = ao_cu_mask;
6803         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6804
6805         return 0;
6806 }
6807
6808 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6809 {
6810         .type = AMD_IP_BLOCK_TYPE_GFX,
6811         .major = 9,
6812         .minor = 0,
6813         .rev = 0,
6814         .funcs = &gfx_v9_0_ip_funcs,
6815 };