1081fa3d4b0f44ba389442a251b6fa0ded555b86
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52
53 #define GFX9_NUM_GFX_RINGS     1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
57
58 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
64
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
122
123 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
133 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
135
136 enum ta_ras_gfx_subblock {
137         /*CPC*/
138         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
139         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
140         TA_RAS_BLOCK__GFX_CPC_UCODE,
141         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
142         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
143         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
144         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
145         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
146         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
148         /* CPF*/
149         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
151         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
152         TA_RAS_BLOCK__GFX_CPF_TAG,
153         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
154         /* CPG*/
155         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
157         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
158         TA_RAS_BLOCK__GFX_CPG_TAG,
159         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
160         /* GDS*/
161         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
163         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
164         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
165         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
166         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
168         /* SPI*/
169         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
170         /* SQ*/
171         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
173         TA_RAS_BLOCK__GFX_SQ_LDS_D,
174         TA_RAS_BLOCK__GFX_SQ_LDS_I,
175         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
176         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
177         /* SQC (3 ranges)*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         /* SQC range 0*/
180         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
181         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
182                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
188         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
190                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
191         /* SQC range 1*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
201         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
204                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
205         /* SQC range 2*/
206         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
208                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
215         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
216         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
218                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
219         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
220         /* TA*/
221         TA_RAS_BLOCK__GFX_TA_INDEX_START,
222         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
223         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
224         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
225         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
226         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
228         /* TCA*/
229         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
231         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
233         /* TCC (5 sub-ranges)*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         /* TCC range 0*/
236         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
241         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
242         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
243         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
244         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
246         /* TCC range 1*/
247         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
249         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
251                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
252         /* TCC range 2*/
253         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
255         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
256         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
257         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
258         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
259         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
261         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
263                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
264         /* TCC range 3*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
267         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
269                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
270         /* TCC range 4*/
271         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
273                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
274         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
276                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
277         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
278         /* TCI*/
279         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
280         /* TCP*/
281         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
283         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
284         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
285         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
286         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
287         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
288         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
290         /* TD*/
291         TA_RAS_BLOCK__GFX_TD_INDEX_START,
292         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
293         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
294         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
296         /* EA (3 sub-ranges)*/
297         TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         /* EA range 0*/
299         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
300         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
301         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
303         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
304         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
306         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
307         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
309         /* EA range 1*/
310         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
315         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
316         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
319         /* EA range 2*/
320         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
322         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
323         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
324         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
326         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
327         /* UTC VM L2 bank*/
328         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
329         /* UTC VM walker*/
330         TA_RAS_BLOCK__UTC_VML2_WALKER,
331         /* UTC ATC L2 2MB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
333         /* UTC ATC L2 4KB cache*/
334         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
335         TA_RAS_BLOCK__GFX_MAX
336 };
337
338 struct ras_gfx_subblock {
339         unsigned char *name;
340         int ta_subblock;
341         int hw_supported_error_type;
342         int sw_supported_error_type;
343 };
344
345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
346         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
347                 #subblock,                                                     \
348                 TA_RAS_BLOCK__##subblock,                                      \
349                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
350                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
351         }
352
353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
354         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
355         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
373                              0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382                              0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
384                              0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390                              0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
394                              1),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
396                              0, 0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
408                              0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
414                              0, 0, 0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
426                              0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
442                              1),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
446                              0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
459                              0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
464                              0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
466                              0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
476         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 };
502
503 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
504 {
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 };
526
527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
528 {
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 };
548
549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
550 {
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 };
563
564 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
565 {
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 };
591
592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
593 {
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 };
602
603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
604 {
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 };
625
626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
627 {
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 };
685
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 };
699
700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
701 {
702         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 };
711
712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
713 {
714         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 };
723
724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
728
729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
734                                  struct amdgpu_cu_info *cu_info);
735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
740                                           void *ras_error_status);
741 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
742                                      void *inject_if);
743 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
744
745 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
746                                 uint64_t queue_mask)
747 {
748         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
749         amdgpu_ring_write(kiq_ring,
750                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
751                 /* vmid_mask:0* queue_type:0 (KIQ) */
752                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
753         amdgpu_ring_write(kiq_ring,
754                         lower_32_bits(queue_mask));     /* queue mask lo */
755         amdgpu_ring_write(kiq_ring,
756                         upper_32_bits(queue_mask));     /* queue mask hi */
757         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
758         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
759         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
760         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
761 }
762
763 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
764                                  struct amdgpu_ring *ring)
765 {
766         struct amdgpu_device *adev = kiq_ring->adev;
767         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
768         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
769         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
770
771         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
772         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
773         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
774                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
775                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
776                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
777                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
778                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
779                          /*queue_type: normal compute queue */
780                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
781                          /* alloc format: all_on_one_pipe */
782                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
783                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
784                          /* num_queues: must be 1 */
785                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
786         amdgpu_ring_write(kiq_ring,
787                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
788         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
789         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
790         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
791         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
792 }
793
794 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
795                                    struct amdgpu_ring *ring,
796                                    enum amdgpu_unmap_queues_action action,
797                                    u64 gpu_addr, u64 seq)
798 {
799         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
800
801         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
802         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
803                           PACKET3_UNMAP_QUEUES_ACTION(action) |
804                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
805                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
806                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
807         amdgpu_ring_write(kiq_ring,
808                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
809
810         if (action == PREEMPT_QUEUES_NO_UNMAP) {
811                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
812                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
813                 amdgpu_ring_write(kiq_ring, seq);
814         } else {
815                 amdgpu_ring_write(kiq_ring, 0);
816                 amdgpu_ring_write(kiq_ring, 0);
817                 amdgpu_ring_write(kiq_ring, 0);
818         }
819 }
820
821 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
822                                    struct amdgpu_ring *ring,
823                                    u64 addr,
824                                    u64 seq)
825 {
826         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
827
828         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
829         amdgpu_ring_write(kiq_ring,
830                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
831                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
832                           PACKET3_QUERY_STATUS_COMMAND(2));
833         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
834         amdgpu_ring_write(kiq_ring,
835                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
836                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
837         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
838         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
839         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
840         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
841 }
842
843 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
844                                 uint16_t pasid, uint32_t flush_type,
845                                 bool all_hub)
846 {
847         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
848         amdgpu_ring_write(kiq_ring,
849                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
850                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
851                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
852                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
853 }
854
855 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
856         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
857         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
858         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
859         .kiq_query_status = gfx_v9_0_kiq_query_status,
860         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
861         .set_resources_size = 8,
862         .map_queues_size = 7,
863         .unmap_queues_size = 6,
864         .query_status_size = 7,
865         .invalidate_tlbs_size = 2,
866 };
867
868 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
869 {
870         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
871 }
872
873 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
874 {
875         switch (adev->asic_type) {
876         case CHIP_VEGA10:
877                 soc15_program_register_sequence(adev,
878                                                 golden_settings_gc_9_0,
879                                                 ARRAY_SIZE(golden_settings_gc_9_0));
880                 soc15_program_register_sequence(adev,
881                                                 golden_settings_gc_9_0_vg10,
882                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
883                 break;
884         case CHIP_VEGA12:
885                 soc15_program_register_sequence(adev,
886                                                 golden_settings_gc_9_2_1,
887                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
888                 soc15_program_register_sequence(adev,
889                                                 golden_settings_gc_9_2_1_vg12,
890                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
891                 break;
892         case CHIP_VEGA20:
893                 soc15_program_register_sequence(adev,
894                                                 golden_settings_gc_9_0,
895                                                 ARRAY_SIZE(golden_settings_gc_9_0));
896                 soc15_program_register_sequence(adev,
897                                                 golden_settings_gc_9_0_vg20,
898                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
899                 break;
900         case CHIP_ARCTURUS:
901                 soc15_program_register_sequence(adev,
902                                                 golden_settings_gc_9_4_1_arct,
903                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
904                 break;
905         case CHIP_RAVEN:
906                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
907                                                 ARRAY_SIZE(golden_settings_gc_9_1));
908                 if (adev->rev_id >= 8)
909                         soc15_program_register_sequence(adev,
910                                                         golden_settings_gc_9_1_rv2,
911                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
912                 else
913                         soc15_program_register_sequence(adev,
914                                                         golden_settings_gc_9_1_rv1,
915                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
916                 break;
917          case CHIP_RENOIR:
918                 soc15_program_register_sequence(adev,
919                                                 golden_settings_gc_9_1_rn,
920                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
921                 return; /* for renoir, don't need common goldensetting */
922         default:
923                 break;
924         }
925
926         if (adev->asic_type != CHIP_ARCTURUS)
927                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
928                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
929 }
930
931 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
932 {
933         adev->gfx.scratch.num_reg = 8;
934         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
935         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
936 }
937
938 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
939                                        bool wc, uint32_t reg, uint32_t val)
940 {
941         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
942         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
943                                 WRITE_DATA_DST_SEL(0) |
944                                 (wc ? WR_CONFIRM : 0));
945         amdgpu_ring_write(ring, reg);
946         amdgpu_ring_write(ring, 0);
947         amdgpu_ring_write(ring, val);
948 }
949
950 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
951                                   int mem_space, int opt, uint32_t addr0,
952                                   uint32_t addr1, uint32_t ref, uint32_t mask,
953                                   uint32_t inv)
954 {
955         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
956         amdgpu_ring_write(ring,
957                                  /* memory (1) or register (0) */
958                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
959                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
960                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
961                                  WAIT_REG_MEM_ENGINE(eng_sel)));
962
963         if (mem_space)
964                 BUG_ON(addr0 & 0x3); /* Dword align */
965         amdgpu_ring_write(ring, addr0);
966         amdgpu_ring_write(ring, addr1);
967         amdgpu_ring_write(ring, ref);
968         amdgpu_ring_write(ring, mask);
969         amdgpu_ring_write(ring, inv); /* poll interval */
970 }
971
972 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
973 {
974         struct amdgpu_device *adev = ring->adev;
975         uint32_t scratch;
976         uint32_t tmp = 0;
977         unsigned i;
978         int r;
979
980         r = amdgpu_gfx_scratch_get(adev, &scratch);
981         if (r)
982                 return r;
983
984         WREG32(scratch, 0xCAFEDEAD);
985         r = amdgpu_ring_alloc(ring, 3);
986         if (r)
987                 goto error_free_scratch;
988
989         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
990         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
991         amdgpu_ring_write(ring, 0xDEADBEEF);
992         amdgpu_ring_commit(ring);
993
994         for (i = 0; i < adev->usec_timeout; i++) {
995                 tmp = RREG32(scratch);
996                 if (tmp == 0xDEADBEEF)
997                         break;
998                 udelay(1);
999         }
1000
1001         if (i >= adev->usec_timeout)
1002                 r = -ETIMEDOUT;
1003
1004 error_free_scratch:
1005         amdgpu_gfx_scratch_free(adev, scratch);
1006         return r;
1007 }
1008
1009 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1010 {
1011         struct amdgpu_device *adev = ring->adev;
1012         struct amdgpu_ib ib;
1013         struct dma_fence *f = NULL;
1014
1015         unsigned index;
1016         uint64_t gpu_addr;
1017         uint32_t tmp;
1018         long r;
1019
1020         r = amdgpu_device_wb_get(adev, &index);
1021         if (r)
1022                 return r;
1023
1024         gpu_addr = adev->wb.gpu_addr + (index * 4);
1025         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1026         memset(&ib, 0, sizeof(ib));
1027         r = amdgpu_ib_get(adev, NULL, 16, &ib);
1028         if (r)
1029                 goto err1;
1030
1031         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1032         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1033         ib.ptr[2] = lower_32_bits(gpu_addr);
1034         ib.ptr[3] = upper_32_bits(gpu_addr);
1035         ib.ptr[4] = 0xDEADBEEF;
1036         ib.length_dw = 5;
1037
1038         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1039         if (r)
1040                 goto err2;
1041
1042         r = dma_fence_wait_timeout(f, false, timeout);
1043         if (r == 0) {
1044                 r = -ETIMEDOUT;
1045                 goto err2;
1046         } else if (r < 0) {
1047                 goto err2;
1048         }
1049
1050         tmp = adev->wb.wb[index];
1051         if (tmp == 0xDEADBEEF)
1052                 r = 0;
1053         else
1054                 r = -EINVAL;
1055
1056 err2:
1057         amdgpu_ib_free(adev, &ib, NULL);
1058         dma_fence_put(f);
1059 err1:
1060         amdgpu_device_wb_free(adev, index);
1061         return r;
1062 }
1063
1064
1065 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1066 {
1067         release_firmware(adev->gfx.pfp_fw);
1068         adev->gfx.pfp_fw = NULL;
1069         release_firmware(adev->gfx.me_fw);
1070         adev->gfx.me_fw = NULL;
1071         release_firmware(adev->gfx.ce_fw);
1072         adev->gfx.ce_fw = NULL;
1073         release_firmware(adev->gfx.rlc_fw);
1074         adev->gfx.rlc_fw = NULL;
1075         release_firmware(adev->gfx.mec_fw);
1076         adev->gfx.mec_fw = NULL;
1077         release_firmware(adev->gfx.mec2_fw);
1078         adev->gfx.mec2_fw = NULL;
1079
1080         kfree(adev->gfx.rlc.register_list_format);
1081 }
1082
1083 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1084 {
1085         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1086
1087         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1088         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1089         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1090         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1091         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1092         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1093         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1094         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1095         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1096         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1097         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1098         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1099         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1100         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1101                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1102 }
1103
1104 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1105 {
1106         adev->gfx.me_fw_write_wait = false;
1107         adev->gfx.mec_fw_write_wait = false;
1108
1109         if ((adev->asic_type != CHIP_ARCTURUS) &&
1110             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1111             (adev->gfx.mec_feature_version < 46) ||
1112             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1113             (adev->gfx.pfp_feature_version < 46)))
1114                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1115
1116         switch (adev->asic_type) {
1117         case CHIP_VEGA10:
1118                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1119                     (adev->gfx.me_feature_version >= 42) &&
1120                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1121                     (adev->gfx.pfp_feature_version >= 42))
1122                         adev->gfx.me_fw_write_wait = true;
1123
1124                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1125                     (adev->gfx.mec_feature_version >= 42))
1126                         adev->gfx.mec_fw_write_wait = true;
1127                 break;
1128         case CHIP_VEGA12:
1129                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1130                     (adev->gfx.me_feature_version >= 44) &&
1131                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1132                     (adev->gfx.pfp_feature_version >= 44))
1133                         adev->gfx.me_fw_write_wait = true;
1134
1135                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1136                     (adev->gfx.mec_feature_version >= 44))
1137                         adev->gfx.mec_fw_write_wait = true;
1138                 break;
1139         case CHIP_VEGA20:
1140                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1141                     (adev->gfx.me_feature_version >= 44) &&
1142                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1143                     (adev->gfx.pfp_feature_version >= 44))
1144                         adev->gfx.me_fw_write_wait = true;
1145
1146                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1147                     (adev->gfx.mec_feature_version >= 44))
1148                         adev->gfx.mec_fw_write_wait = true;
1149                 break;
1150         case CHIP_RAVEN:
1151                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1152                     (adev->gfx.me_feature_version >= 42) &&
1153                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1154                     (adev->gfx.pfp_feature_version >= 42))
1155                         adev->gfx.me_fw_write_wait = true;
1156
1157                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1158                     (adev->gfx.mec_feature_version >= 42))
1159                         adev->gfx.mec_fw_write_wait = true;
1160                 break;
1161         default:
1162                 break;
1163         }
1164 }
1165
1166 struct amdgpu_gfxoff_quirk {
1167         u16 chip_vendor;
1168         u16 chip_device;
1169         u16 subsys_vendor;
1170         u16 subsys_device;
1171         u8 revision;
1172 };
1173
1174 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1175         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1176         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1177         { 0, 0, 0, 0, 0 },
1178 };
1179
1180 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1181 {
1182         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1183
1184         while (p && p->chip_device != 0) {
1185                 if (pdev->vendor == p->chip_vendor &&
1186                     pdev->device == p->chip_device &&
1187                     pdev->subsystem_vendor == p->subsys_vendor &&
1188                     pdev->subsystem_device == p->subsys_device &&
1189                     pdev->revision == p->revision) {
1190                         return true;
1191                 }
1192                 ++p;
1193         }
1194         return false;
1195 }
1196
1197 static bool is_raven_kicker(struct amdgpu_device *adev)
1198 {
1199         if (adev->pm.fw_version >= 0x41e2b)
1200                 return true;
1201         else
1202                 return false;
1203 }
1204
1205 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1206 {
1207         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1208                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1209
1210         switch (adev->asic_type) {
1211         case CHIP_VEGA10:
1212         case CHIP_VEGA12:
1213         case CHIP_VEGA20:
1214                 break;
1215         case CHIP_RAVEN:
1216                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1217                     ((!is_raven_kicker(adev) &&
1218                       adev->gfx.rlc_fw_version < 531) ||
1219                      (adev->gfx.rlc_feature_version < 1) ||
1220                      !adev->gfx.rlc.is_rlc_v2_1))
1221                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1222
1223                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1224                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1225                                 AMD_PG_SUPPORT_CP |
1226                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1227                 break;
1228         case CHIP_RENOIR:
1229                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1230                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1231                                 AMD_PG_SUPPORT_CP |
1232                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1233                 break;
1234         default:
1235                 break;
1236         }
1237 }
1238
1239 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1240                                           const char *chip_name)
1241 {
1242         char fw_name[30];
1243         int err;
1244         struct amdgpu_firmware_info *info = NULL;
1245         const struct common_firmware_header *header = NULL;
1246         const struct gfx_firmware_header_v1_0 *cp_hdr;
1247
1248         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1249         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1250         if (err)
1251                 goto out;
1252         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1253         if (err)
1254                 goto out;
1255         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1256         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1257         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1258
1259         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1260         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1261         if (err)
1262                 goto out;
1263         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1264         if (err)
1265                 goto out;
1266         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1267         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1268         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1269
1270         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1271         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1272         if (err)
1273                 goto out;
1274         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1275         if (err)
1276                 goto out;
1277         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1278         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1279         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1280
1281         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1282                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1283                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1284                 info->fw = adev->gfx.pfp_fw;
1285                 header = (const struct common_firmware_header *)info->fw->data;
1286                 adev->firmware.fw_size +=
1287                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1288
1289                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1290                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1291                 info->fw = adev->gfx.me_fw;
1292                 header = (const struct common_firmware_header *)info->fw->data;
1293                 adev->firmware.fw_size +=
1294                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1295
1296                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1297                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1298                 info->fw = adev->gfx.ce_fw;
1299                 header = (const struct common_firmware_header *)info->fw->data;
1300                 adev->firmware.fw_size +=
1301                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1302         }
1303
1304 out:
1305         if (err) {
1306                 dev_err(adev->dev,
1307                         "gfx9: Failed to load firmware \"%s\"\n",
1308                         fw_name);
1309                 release_firmware(adev->gfx.pfp_fw);
1310                 adev->gfx.pfp_fw = NULL;
1311                 release_firmware(adev->gfx.me_fw);
1312                 adev->gfx.me_fw = NULL;
1313                 release_firmware(adev->gfx.ce_fw);
1314                 adev->gfx.ce_fw = NULL;
1315         }
1316         return err;
1317 }
1318
1319 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1320                                           const char *chip_name)
1321 {
1322         char fw_name[30];
1323         int err;
1324         struct amdgpu_firmware_info *info = NULL;
1325         const struct common_firmware_header *header = NULL;
1326         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1327         unsigned int *tmp = NULL;
1328         unsigned int i = 0;
1329         uint16_t version_major;
1330         uint16_t version_minor;
1331         uint32_t smu_version;
1332
1333         /*
1334          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1335          * instead of picasso_rlc.bin.
1336          * Judgment method:
1337          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1338          *          or revision >= 0xD8 && revision <= 0xDF
1339          * otherwise is PCO FP5
1340          */
1341         if (!strcmp(chip_name, "picasso") &&
1342                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1343                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1344                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1345         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1346                 (smu_version >= 0x41e2b))
1347                 /**
1348                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1349                 */
1350                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1351         else
1352                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1353         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1354         if (err)
1355                 goto out;
1356         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1357         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1358
1359         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1360         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1361         if (version_major == 2 && version_minor == 1)
1362                 adev->gfx.rlc.is_rlc_v2_1 = true;
1363
1364         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1365         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1366         adev->gfx.rlc.save_and_restore_offset =
1367                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1368         adev->gfx.rlc.clear_state_descriptor_offset =
1369                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1370         adev->gfx.rlc.avail_scratch_ram_locations =
1371                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1372         adev->gfx.rlc.reg_restore_list_size =
1373                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1374         adev->gfx.rlc.reg_list_format_start =
1375                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1376         adev->gfx.rlc.reg_list_format_separate_start =
1377                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1378         adev->gfx.rlc.starting_offsets_start =
1379                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1380         adev->gfx.rlc.reg_list_format_size_bytes =
1381                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1382         adev->gfx.rlc.reg_list_size_bytes =
1383                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1384         adev->gfx.rlc.register_list_format =
1385                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1386                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1387         if (!adev->gfx.rlc.register_list_format) {
1388                 err = -ENOMEM;
1389                 goto out;
1390         }
1391
1392         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1393                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1394         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1395                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1396
1397         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1398
1399         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1400                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1401         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1402                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1403
1404         if (adev->gfx.rlc.is_rlc_v2_1)
1405                 gfx_v9_0_init_rlc_ext_microcode(adev);
1406
1407         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1408                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1409                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1410                 info->fw = adev->gfx.rlc_fw;
1411                 header = (const struct common_firmware_header *)info->fw->data;
1412                 adev->firmware.fw_size +=
1413                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1414
1415                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1416                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1417                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1418                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1419                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1420                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1421                         info->fw = adev->gfx.rlc_fw;
1422                         adev->firmware.fw_size +=
1423                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1424
1425                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1426                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1427                         info->fw = adev->gfx.rlc_fw;
1428                         adev->firmware.fw_size +=
1429                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1430
1431                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1432                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1433                         info->fw = adev->gfx.rlc_fw;
1434                         adev->firmware.fw_size +=
1435                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1436                 }
1437         }
1438
1439 out:
1440         if (err) {
1441                 dev_err(adev->dev,
1442                         "gfx9: Failed to load firmware \"%s\"\n",
1443                         fw_name);
1444                 release_firmware(adev->gfx.rlc_fw);
1445                 adev->gfx.rlc_fw = NULL;
1446         }
1447         return err;
1448 }
1449
1450 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1451                                           const char *chip_name)
1452 {
1453         char fw_name[30];
1454         int err;
1455         struct amdgpu_firmware_info *info = NULL;
1456         const struct common_firmware_header *header = NULL;
1457         const struct gfx_firmware_header_v1_0 *cp_hdr;
1458
1459         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1460         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1461         if (err)
1462                 goto out;
1463         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1464         if (err)
1465                 goto out;
1466         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1467         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1468         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1469
1470
1471         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1472         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1473         if (!err) {
1474                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1475                 if (err)
1476                         goto out;
1477                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1478                 adev->gfx.mec2_fw->data;
1479                 adev->gfx.mec2_fw_version =
1480                 le32_to_cpu(cp_hdr->header.ucode_version);
1481                 adev->gfx.mec2_feature_version =
1482                 le32_to_cpu(cp_hdr->ucode_feature_version);
1483         } else {
1484                 err = 0;
1485                 adev->gfx.mec2_fw = NULL;
1486         }
1487
1488         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1489                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1490                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1491                 info->fw = adev->gfx.mec_fw;
1492                 header = (const struct common_firmware_header *)info->fw->data;
1493                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1494                 adev->firmware.fw_size +=
1495                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1496
1497                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1498                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1499                 info->fw = adev->gfx.mec_fw;
1500                 adev->firmware.fw_size +=
1501                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1502
1503                 if (adev->gfx.mec2_fw) {
1504                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1505                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1506                         info->fw = adev->gfx.mec2_fw;
1507                         header = (const struct common_firmware_header *)info->fw->data;
1508                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1509                         adev->firmware.fw_size +=
1510                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1511
1512                         /* TODO: Determine if MEC2 JT FW loading can be removed
1513                                  for all GFX V9 asic and above */
1514                         if (adev->asic_type != CHIP_ARCTURUS &&
1515                             adev->asic_type != CHIP_RENOIR) {
1516                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1517                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1518                                 info->fw = adev->gfx.mec2_fw;
1519                                 adev->firmware.fw_size +=
1520                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1521                                         PAGE_SIZE);
1522                         }
1523                 }
1524         }
1525
1526 out:
1527         gfx_v9_0_check_if_need_gfxoff(adev);
1528         gfx_v9_0_check_fw_write_wait(adev);
1529         if (err) {
1530                 dev_err(adev->dev,
1531                         "gfx9: Failed to load firmware \"%s\"\n",
1532                         fw_name);
1533                 release_firmware(adev->gfx.mec_fw);
1534                 adev->gfx.mec_fw = NULL;
1535                 release_firmware(adev->gfx.mec2_fw);
1536                 adev->gfx.mec2_fw = NULL;
1537         }
1538         return err;
1539 }
1540
1541 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1542 {
1543         const char *chip_name;
1544         int r;
1545
1546         DRM_DEBUG("\n");
1547
1548         switch (adev->asic_type) {
1549         case CHIP_VEGA10:
1550                 chip_name = "vega10";
1551                 break;
1552         case CHIP_VEGA12:
1553                 chip_name = "vega12";
1554                 break;
1555         case CHIP_VEGA20:
1556                 chip_name = "vega20";
1557                 break;
1558         case CHIP_RAVEN:
1559                 if (adev->rev_id >= 8)
1560                         chip_name = "raven2";
1561                 else if (adev->pdev->device == 0x15d8)
1562                         chip_name = "picasso";
1563                 else
1564                         chip_name = "raven";
1565                 break;
1566         case CHIP_ARCTURUS:
1567                 chip_name = "arcturus";
1568                 break;
1569         case CHIP_RENOIR:
1570                 chip_name = "renoir";
1571                 break;
1572         default:
1573                 BUG();
1574         }
1575
1576         /* No CPG in Arcturus */
1577         if (adev->asic_type != CHIP_ARCTURUS) {
1578                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1579                 if (r)
1580                         return r;
1581         }
1582
1583         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1584         if (r)
1585                 return r;
1586
1587         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1588         if (r)
1589                 return r;
1590
1591         return r;
1592 }
1593
1594 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1595 {
1596         u32 count = 0;
1597         const struct cs_section_def *sect = NULL;
1598         const struct cs_extent_def *ext = NULL;
1599
1600         /* begin clear state */
1601         count += 2;
1602         /* context control state */
1603         count += 3;
1604
1605         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1606                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1607                         if (sect->id == SECT_CONTEXT)
1608                                 count += 2 + ext->reg_count;
1609                         else
1610                                 return 0;
1611                 }
1612         }
1613
1614         /* end clear state */
1615         count += 2;
1616         /* clear state */
1617         count += 2;
1618
1619         return count;
1620 }
1621
1622 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1623                                     volatile u32 *buffer)
1624 {
1625         u32 count = 0, i;
1626         const struct cs_section_def *sect = NULL;
1627         const struct cs_extent_def *ext = NULL;
1628
1629         if (adev->gfx.rlc.cs_data == NULL)
1630                 return;
1631         if (buffer == NULL)
1632                 return;
1633
1634         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1635         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1636
1637         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1638         buffer[count++] = cpu_to_le32(0x80000000);
1639         buffer[count++] = cpu_to_le32(0x80000000);
1640
1641         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1642                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1643                         if (sect->id == SECT_CONTEXT) {
1644                                 buffer[count++] =
1645                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1646                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1647                                                 PACKET3_SET_CONTEXT_REG_START);
1648                                 for (i = 0; i < ext->reg_count; i++)
1649                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1650                         } else {
1651                                 return;
1652                         }
1653                 }
1654         }
1655
1656         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1657         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1658
1659         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1660         buffer[count++] = cpu_to_le32(0);
1661 }
1662
1663 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1664 {
1665         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1666         uint32_t pg_always_on_cu_num = 2;
1667         uint32_t always_on_cu_num;
1668         uint32_t i, j, k;
1669         uint32_t mask, cu_bitmap, counter;
1670
1671         if (adev->flags & AMD_IS_APU)
1672                 always_on_cu_num = 4;
1673         else if (adev->asic_type == CHIP_VEGA12)
1674                 always_on_cu_num = 8;
1675         else
1676                 always_on_cu_num = 12;
1677
1678         mutex_lock(&adev->grbm_idx_mutex);
1679         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1680                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1681                         mask = 1;
1682                         cu_bitmap = 0;
1683                         counter = 0;
1684                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1685
1686                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1687                                 if (cu_info->bitmap[i][j] & mask) {
1688                                         if (counter == pg_always_on_cu_num)
1689                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1690                                         if (counter < always_on_cu_num)
1691                                                 cu_bitmap |= mask;
1692                                         else
1693                                                 break;
1694                                         counter++;
1695                                 }
1696                                 mask <<= 1;
1697                         }
1698
1699                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1700                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1701                 }
1702         }
1703         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1704         mutex_unlock(&adev->grbm_idx_mutex);
1705 }
1706
1707 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1708 {
1709         uint32_t data;
1710
1711         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1712         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1713         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1714         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1715         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1716
1717         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1718         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1719
1720         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1721         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1722
1723         mutex_lock(&adev->grbm_idx_mutex);
1724         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1725         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1726         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1727
1728         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1729         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1730         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1731         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1732         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1733
1734         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1735         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1736         data &= 0x0000FFFF;
1737         data |= 0x00C00000;
1738         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1739
1740         /*
1741          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1742          * programmed in gfx_v9_0_init_always_on_cu_mask()
1743          */
1744
1745         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1746          * but used for RLC_LB_CNTL configuration */
1747         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1748         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1749         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1750         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1751         mutex_unlock(&adev->grbm_idx_mutex);
1752
1753         gfx_v9_0_init_always_on_cu_mask(adev);
1754 }
1755
1756 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1757 {
1758         uint32_t data;
1759
1760         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1761         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1762         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1763         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1764         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1765
1766         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1767         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1768
1769         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1770         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1771
1772         mutex_lock(&adev->grbm_idx_mutex);
1773         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1774         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1775         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1776
1777         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1778         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1779         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1780         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1781         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1782
1783         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1784         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1785         data &= 0x0000FFFF;
1786         data |= 0x00C00000;
1787         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1788
1789         /*
1790          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1791          * programmed in gfx_v9_0_init_always_on_cu_mask()
1792          */
1793
1794         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1795          * but used for RLC_LB_CNTL configuration */
1796         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1797         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1798         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1799         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1800         mutex_unlock(&adev->grbm_idx_mutex);
1801
1802         gfx_v9_0_init_always_on_cu_mask(adev);
1803 }
1804
1805 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1806 {
1807         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1808 }
1809
1810 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1811 {
1812         return 5;
1813 }
1814
1815 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1816 {
1817         const struct cs_section_def *cs_data;
1818         int r;
1819
1820         adev->gfx.rlc.cs_data = gfx9_cs_data;
1821
1822         cs_data = adev->gfx.rlc.cs_data;
1823
1824         if (cs_data) {
1825                 /* init clear state block */
1826                 r = amdgpu_gfx_rlc_init_csb(adev);
1827                 if (r)
1828                         return r;
1829         }
1830
1831         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1832                 /* TODO: double check the cp_table_size for RV */
1833                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1834                 r = amdgpu_gfx_rlc_init_cpt(adev);
1835                 if (r)
1836                         return r;
1837         }
1838
1839         switch (adev->asic_type) {
1840         case CHIP_RAVEN:
1841                 gfx_v9_0_init_lbpw(adev);
1842                 break;
1843         case CHIP_VEGA20:
1844                 gfx_v9_4_init_lbpw(adev);
1845                 break;
1846         default:
1847                 break;
1848         }
1849
1850         /* init spm vmid with 0xf */
1851         if (adev->gfx.rlc.funcs->update_spm_vmid)
1852                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1853
1854         return 0;
1855 }
1856
1857 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1858 {
1859         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1860         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1861 }
1862
1863 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1864 {
1865         int r;
1866         u32 *hpd;
1867         const __le32 *fw_data;
1868         unsigned fw_size;
1869         u32 *fw;
1870         size_t mec_hpd_size;
1871
1872         const struct gfx_firmware_header_v1_0 *mec_hdr;
1873
1874         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1875
1876         /* take ownership of the relevant compute queues */
1877         amdgpu_gfx_compute_queue_acquire(adev);
1878         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1879
1880         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1881                                       AMDGPU_GEM_DOMAIN_VRAM,
1882                                       &adev->gfx.mec.hpd_eop_obj,
1883                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1884                                       (void **)&hpd);
1885         if (r) {
1886                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1887                 gfx_v9_0_mec_fini(adev);
1888                 return r;
1889         }
1890
1891         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1892
1893         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1894         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1895
1896         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1897
1898         fw_data = (const __le32 *)
1899                 (adev->gfx.mec_fw->data +
1900                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1901         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1902
1903         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1904                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1905                                       &adev->gfx.mec.mec_fw_obj,
1906                                       &adev->gfx.mec.mec_fw_gpu_addr,
1907                                       (void **)&fw);
1908         if (r) {
1909                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1910                 gfx_v9_0_mec_fini(adev);
1911                 return r;
1912         }
1913
1914         memcpy(fw, fw_data, fw_size);
1915
1916         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1917         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1918
1919         return 0;
1920 }
1921
1922 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1923 {
1924         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1925                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1926                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1927                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1928                 (SQ_IND_INDEX__FORCE_READ_MASK));
1929         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1930 }
1931
1932 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1933                            uint32_t wave, uint32_t thread,
1934                            uint32_t regno, uint32_t num, uint32_t *out)
1935 {
1936         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1937                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1938                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1939                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1940                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1941                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1942                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1943         while (num--)
1944                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1945 }
1946
1947 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1948 {
1949         /* type 1 wave data */
1950         dst[(*no_fields)++] = 1;
1951         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1952         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1953         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1954         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1955         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1956         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1957         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1958         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1959         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1960         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1961         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1962         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1963         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1964         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1965 }
1966
1967 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1968                                      uint32_t wave, uint32_t start,
1969                                      uint32_t size, uint32_t *dst)
1970 {
1971         wave_read_regs(
1972                 adev, simd, wave, 0,
1973                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1974 }
1975
1976 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1977                                      uint32_t wave, uint32_t thread,
1978                                      uint32_t start, uint32_t size,
1979                                      uint32_t *dst)
1980 {
1981         wave_read_regs(
1982                 adev, simd, wave, thread,
1983                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1984 }
1985
1986 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1987                                   u32 me, u32 pipe, u32 q, u32 vm)
1988 {
1989         soc15_grbm_select(adev, me, pipe, q, vm);
1990 }
1991
1992 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1993         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1994         .select_se_sh = &gfx_v9_0_select_se_sh,
1995         .read_wave_data = &gfx_v9_0_read_wave_data,
1996         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1997         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1998         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1999         .ras_error_inject = &gfx_v9_0_ras_error_inject,
2000         .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2001         .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2002 };
2003
2004 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2005         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2006         .select_se_sh = &gfx_v9_0_select_se_sh,
2007         .read_wave_data = &gfx_v9_0_read_wave_data,
2008         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2009         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2010         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2011         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2012         .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2013         .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2014 };
2015
2016 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2017 {
2018         u32 gb_addr_config;
2019         int err;
2020
2021         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2022
2023         switch (adev->asic_type) {
2024         case CHIP_VEGA10:
2025                 adev->gfx.config.max_hw_contexts = 8;
2026                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2029                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2031                 break;
2032         case CHIP_VEGA12:
2033                 adev->gfx.config.max_hw_contexts = 8;
2034                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2035                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2036                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2037                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2038                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2039                 DRM_INFO("fix gfx.config for vega12\n");
2040                 break;
2041         case CHIP_VEGA20:
2042                 adev->gfx.config.max_hw_contexts = 8;
2043                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2044                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2045                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2046                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2047                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2048                 gb_addr_config &= ~0xf3e777ff;
2049                 gb_addr_config |= 0x22014042;
2050                 /* check vbios table if gpu info is not available */
2051                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2052                 if (err)
2053                         return err;
2054                 break;
2055         case CHIP_RAVEN:
2056                 adev->gfx.config.max_hw_contexts = 8;
2057                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2058                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2059                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2060                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2061                 if (adev->rev_id >= 8)
2062                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2063                 else
2064                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2065                 break;
2066         case CHIP_ARCTURUS:
2067                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2068                 adev->gfx.config.max_hw_contexts = 8;
2069                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2070                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2071                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2072                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2073                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2074                 gb_addr_config &= ~0xf3e777ff;
2075                 gb_addr_config |= 0x22014042;
2076                 break;
2077         case CHIP_RENOIR:
2078                 adev->gfx.config.max_hw_contexts = 8;
2079                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2080                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2081                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2082                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2083                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2084                 gb_addr_config &= ~0xf3e777ff;
2085                 gb_addr_config |= 0x22010042;
2086                 break;
2087         default:
2088                 BUG();
2089                 break;
2090         }
2091
2092         adev->gfx.config.gb_addr_config = gb_addr_config;
2093
2094         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2095                         REG_GET_FIELD(
2096                                         adev->gfx.config.gb_addr_config,
2097                                         GB_ADDR_CONFIG,
2098                                         NUM_PIPES);
2099
2100         adev->gfx.config.max_tile_pipes =
2101                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2102
2103         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2104                         REG_GET_FIELD(
2105                                         adev->gfx.config.gb_addr_config,
2106                                         GB_ADDR_CONFIG,
2107                                         NUM_BANKS);
2108         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2109                         REG_GET_FIELD(
2110                                         adev->gfx.config.gb_addr_config,
2111                                         GB_ADDR_CONFIG,
2112                                         MAX_COMPRESSED_FRAGS);
2113         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2114                         REG_GET_FIELD(
2115                                         adev->gfx.config.gb_addr_config,
2116                                         GB_ADDR_CONFIG,
2117                                         NUM_RB_PER_SE);
2118         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2119                         REG_GET_FIELD(
2120                                         adev->gfx.config.gb_addr_config,
2121                                         GB_ADDR_CONFIG,
2122                                         NUM_SHADER_ENGINES);
2123         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2124                         REG_GET_FIELD(
2125                                         adev->gfx.config.gb_addr_config,
2126                                         GB_ADDR_CONFIG,
2127                                         PIPE_INTERLEAVE_SIZE));
2128
2129         return 0;
2130 }
2131
2132 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2133                                       int mec, int pipe, int queue)
2134 {
2135         int r;
2136         unsigned irq_type;
2137         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2138
2139         ring = &adev->gfx.compute_ring[ring_id];
2140
2141         /* mec0 is me1 */
2142         ring->me = mec + 1;
2143         ring->pipe = pipe;
2144         ring->queue = queue;
2145
2146         ring->ring_obj = NULL;
2147         ring->use_doorbell = true;
2148         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2149         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2150                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2151         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2152
2153         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2154                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2155                 + ring->pipe;
2156
2157         /* type-2 packets are deprecated on MEC, use type-3 instead */
2158         r = amdgpu_ring_init(adev, ring, 1024,
2159                              &adev->gfx.eop_irq, irq_type);
2160         if (r)
2161                 return r;
2162
2163
2164         return 0;
2165 }
2166
2167 static int gfx_v9_0_sw_init(void *handle)
2168 {
2169         int i, j, k, r, ring_id;
2170         struct amdgpu_ring *ring;
2171         struct amdgpu_kiq *kiq;
2172         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2173
2174         switch (adev->asic_type) {
2175         case CHIP_VEGA10:
2176         case CHIP_VEGA12:
2177         case CHIP_VEGA20:
2178         case CHIP_RAVEN:
2179         case CHIP_ARCTURUS:
2180         case CHIP_RENOIR:
2181                 adev->gfx.mec.num_mec = 2;
2182                 break;
2183         default:
2184                 adev->gfx.mec.num_mec = 1;
2185                 break;
2186         }
2187
2188         adev->gfx.mec.num_pipe_per_mec = 4;
2189         adev->gfx.mec.num_queue_per_pipe = 8;
2190
2191         /* EOP Event */
2192         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2193         if (r)
2194                 return r;
2195
2196         /* Privileged reg */
2197         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2198                               &adev->gfx.priv_reg_irq);
2199         if (r)
2200                 return r;
2201
2202         /* Privileged inst */
2203         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2204                               &adev->gfx.priv_inst_irq);
2205         if (r)
2206                 return r;
2207
2208         /* ECC error */
2209         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2210                               &adev->gfx.cp_ecc_error_irq);
2211         if (r)
2212                 return r;
2213
2214         /* FUE error */
2215         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2216                               &adev->gfx.cp_ecc_error_irq);
2217         if (r)
2218                 return r;
2219
2220         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2221
2222         gfx_v9_0_scratch_init(adev);
2223
2224         r = gfx_v9_0_init_microcode(adev);
2225         if (r) {
2226                 DRM_ERROR("Failed to load gfx firmware!\n");
2227                 return r;
2228         }
2229
2230         r = adev->gfx.rlc.funcs->init(adev);
2231         if (r) {
2232                 DRM_ERROR("Failed to init rlc BOs!\n");
2233                 return r;
2234         }
2235
2236         r = gfx_v9_0_mec_init(adev);
2237         if (r) {
2238                 DRM_ERROR("Failed to init MEC BOs!\n");
2239                 return r;
2240         }
2241
2242         /* set up the gfx ring */
2243         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2244                 ring = &adev->gfx.gfx_ring[i];
2245                 ring->ring_obj = NULL;
2246                 if (!i)
2247                         sprintf(ring->name, "gfx");
2248                 else
2249                         sprintf(ring->name, "gfx_%d", i);
2250                 ring->use_doorbell = true;
2251                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2252                 r = amdgpu_ring_init(adev, ring, 1024,
2253                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2254                 if (r)
2255                         return r;
2256         }
2257
2258         /* set up the compute queues - allocate horizontally across pipes */
2259         ring_id = 0;
2260         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2261                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2262                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2263                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2264                                         continue;
2265
2266                                 r = gfx_v9_0_compute_ring_init(adev,
2267                                                                ring_id,
2268                                                                i, k, j);
2269                                 if (r)
2270                                         return r;
2271
2272                                 ring_id++;
2273                         }
2274                 }
2275         }
2276
2277         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2278         if (r) {
2279                 DRM_ERROR("Failed to init KIQ BOs!\n");
2280                 return r;
2281         }
2282
2283         kiq = &adev->gfx.kiq;
2284         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2285         if (r)
2286                 return r;
2287
2288         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2289         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2290         if (r)
2291                 return r;
2292
2293         adev->gfx.ce_ram_size = 0x8000;
2294
2295         r = gfx_v9_0_gpu_early_init(adev);
2296         if (r)
2297                 return r;
2298
2299         return 0;
2300 }
2301
2302
2303 static int gfx_v9_0_sw_fini(void *handle)
2304 {
2305         int i;
2306         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2307
2308         amdgpu_gfx_ras_fini(adev);
2309
2310         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2311                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2312         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2313                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2314
2315         amdgpu_gfx_mqd_sw_fini(adev);
2316         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2317         amdgpu_gfx_kiq_fini(adev);
2318
2319         gfx_v9_0_mec_fini(adev);
2320         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2321         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2322                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2323                                 &adev->gfx.rlc.cp_table_gpu_addr,
2324                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2325         }
2326         gfx_v9_0_free_microcode(adev);
2327
2328         return 0;
2329 }
2330
2331
2332 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2333 {
2334         /* TODO */
2335 }
2336
2337 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2338 {
2339         u32 data;
2340
2341         if (instance == 0xffffffff)
2342                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2343         else
2344                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2345
2346         if (se_num == 0xffffffff)
2347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2348         else
2349                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2350
2351         if (sh_num == 0xffffffff)
2352                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2353         else
2354                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2355
2356         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2357 }
2358
2359 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2360 {
2361         u32 data, mask;
2362
2363         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2364         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2365
2366         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2367         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2368
2369         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2370                                          adev->gfx.config.max_sh_per_se);
2371
2372         return (~data) & mask;
2373 }
2374
2375 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2376 {
2377         int i, j;
2378         u32 data;
2379         u32 active_rbs = 0;
2380         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2381                                         adev->gfx.config.max_sh_per_se;
2382
2383         mutex_lock(&adev->grbm_idx_mutex);
2384         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2385                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2386                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2387                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2388                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2389                                                rb_bitmap_width_per_sh);
2390                 }
2391         }
2392         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2393         mutex_unlock(&adev->grbm_idx_mutex);
2394
2395         adev->gfx.config.backend_enable_mask = active_rbs;
2396         adev->gfx.config.num_rbs = hweight32(active_rbs);
2397 }
2398
2399 #define DEFAULT_SH_MEM_BASES    (0x6000)
2400 #define FIRST_COMPUTE_VMID      (8)
2401 #define LAST_COMPUTE_VMID       (16)
2402 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2403 {
2404         int i;
2405         uint32_t sh_mem_config;
2406         uint32_t sh_mem_bases;
2407
2408         /*
2409          * Configure apertures:
2410          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2411          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2412          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2413          */
2414         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2415
2416         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2417                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2418                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2419
2420         mutex_lock(&adev->srbm_mutex);
2421         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2422                 soc15_grbm_select(adev, 0, 0, 0, i);
2423                 /* CP and shaders */
2424                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2425                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2426         }
2427         soc15_grbm_select(adev, 0, 0, 0, 0);
2428         mutex_unlock(&adev->srbm_mutex);
2429
2430         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2431            acccess. These should be enabled by FW for target VMIDs. */
2432         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2433                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2434                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2435                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2436                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2437         }
2438 }
2439
2440 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2441 {
2442         int vmid;
2443
2444         /*
2445          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2446          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2447          * the driver can enable them for graphics. VMID0 should maintain
2448          * access so that HWS firmware can save/restore entries.
2449          */
2450         for (vmid = 1; vmid < 16; vmid++) {
2451                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2452                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2453                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2454                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2455         }
2456 }
2457
2458 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2459 {
2460         uint32_t tmp;
2461
2462         switch (adev->asic_type) {
2463         case CHIP_ARCTURUS:
2464                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2465                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2466                                         DISABLE_BARRIER_WAITCNT, 1);
2467                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2468                 break;
2469         default:
2470                 break;
2471         };
2472 }
2473
2474 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2475 {
2476         u32 tmp;
2477         int i;
2478
2479         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2480
2481         gfx_v9_0_tiling_mode_table_init(adev);
2482
2483         gfx_v9_0_setup_rb(adev);
2484         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2485         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2486
2487         /* XXX SH_MEM regs */
2488         /* where to put LDS, scratch, GPUVM in FSA64 space */
2489         mutex_lock(&adev->srbm_mutex);
2490         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2491                 soc15_grbm_select(adev, 0, 0, 0, i);
2492                 /* CP and shaders */
2493                 if (i == 0) {
2494                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2495                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2496                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2497                                             !!amdgpu_noretry);
2498                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2499                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2500                 } else {
2501                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2502                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2503                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2504                                             !!amdgpu_noretry);
2505                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2506                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2507                                 (adev->gmc.private_aperture_start >> 48));
2508                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2509                                 (adev->gmc.shared_aperture_start >> 48));
2510                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2511                 }
2512         }
2513         soc15_grbm_select(adev, 0, 0, 0, 0);
2514
2515         mutex_unlock(&adev->srbm_mutex);
2516
2517         gfx_v9_0_init_compute_vmid(adev);
2518         gfx_v9_0_init_gds_vmid(adev);
2519         gfx_v9_0_init_sq_config(adev);
2520 }
2521
2522 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2523 {
2524         u32 i, j, k;
2525         u32 mask;
2526
2527         mutex_lock(&adev->grbm_idx_mutex);
2528         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2529                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2530                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2531                         for (k = 0; k < adev->usec_timeout; k++) {
2532                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2533                                         break;
2534                                 udelay(1);
2535                         }
2536                         if (k == adev->usec_timeout) {
2537                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2538                                                       0xffffffff, 0xffffffff);
2539                                 mutex_unlock(&adev->grbm_idx_mutex);
2540                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2541                                          i, j);
2542                                 return;
2543                         }
2544                 }
2545         }
2546         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2547         mutex_unlock(&adev->grbm_idx_mutex);
2548
2549         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2550                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2551                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2552                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2553         for (k = 0; k < adev->usec_timeout; k++) {
2554                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2555                         break;
2556                 udelay(1);
2557         }
2558 }
2559
2560 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2561                                                bool enable)
2562 {
2563         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2564
2565         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2566         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2567         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2568         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2569
2570         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2571 }
2572
2573 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2574 {
2575         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2576         /* csib */
2577         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2578                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2579         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2580                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2581         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2582                         adev->gfx.rlc.clear_state_size);
2583 }
2584
2585 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2586                                 int indirect_offset,
2587                                 int list_size,
2588                                 int *unique_indirect_regs,
2589                                 int unique_indirect_reg_count,
2590                                 int *indirect_start_offsets,
2591                                 int *indirect_start_offsets_count,
2592                                 int max_start_offsets_count)
2593 {
2594         int idx;
2595
2596         for (; indirect_offset < list_size; indirect_offset++) {
2597                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2598                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2599                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2600
2601                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2602                         indirect_offset += 2;
2603
2604                         /* look for the matching indice */
2605                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2606                                 if (unique_indirect_regs[idx] ==
2607                                         register_list_format[indirect_offset] ||
2608                                         !unique_indirect_regs[idx])
2609                                         break;
2610                         }
2611
2612                         BUG_ON(idx >= unique_indirect_reg_count);
2613
2614                         if (!unique_indirect_regs[idx])
2615                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2616
2617                         indirect_offset++;
2618                 }
2619         }
2620 }
2621
2622 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2623 {
2624         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2625         int unique_indirect_reg_count = 0;
2626
2627         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2628         int indirect_start_offsets_count = 0;
2629
2630         int list_size = 0;
2631         int i = 0, j = 0;
2632         u32 tmp = 0;
2633
2634         u32 *register_list_format =
2635                 kmemdup(adev->gfx.rlc.register_list_format,
2636                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2637         if (!register_list_format)
2638                 return -ENOMEM;
2639
2640         /* setup unique_indirect_regs array and indirect_start_offsets array */
2641         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2642         gfx_v9_1_parse_ind_reg_list(register_list_format,
2643                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2644                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2645                                     unique_indirect_regs,
2646                                     unique_indirect_reg_count,
2647                                     indirect_start_offsets,
2648                                     &indirect_start_offsets_count,
2649                                     ARRAY_SIZE(indirect_start_offsets));
2650
2651         /* enable auto inc in case it is disabled */
2652         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2653         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2654         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2655
2656         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2657         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2658                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2659         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2660                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2661                         adev->gfx.rlc.register_restore[i]);
2662
2663         /* load indirect register */
2664         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2665                 adev->gfx.rlc.reg_list_format_start);
2666
2667         /* direct register portion */
2668         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2669                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2670                         register_list_format[i]);
2671
2672         /* indirect register portion */
2673         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2674                 if (register_list_format[i] == 0xFFFFFFFF) {
2675                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2676                         continue;
2677                 }
2678
2679                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2680                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2681
2682                 for (j = 0; j < unique_indirect_reg_count; j++) {
2683                         if (register_list_format[i] == unique_indirect_regs[j]) {
2684                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2685                                 break;
2686                         }
2687                 }
2688
2689                 BUG_ON(j >= unique_indirect_reg_count);
2690
2691                 i++;
2692         }
2693
2694         /* set save/restore list size */
2695         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2696         list_size = list_size >> 1;
2697         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2698                 adev->gfx.rlc.reg_restore_list_size);
2699         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2700
2701         /* write the starting offsets to RLC scratch ram */
2702         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2703                 adev->gfx.rlc.starting_offsets_start);
2704         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2705                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2706                        indirect_start_offsets[i]);
2707
2708         /* load unique indirect regs*/
2709         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2710                 if (unique_indirect_regs[i] != 0) {
2711                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2712                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2713                                unique_indirect_regs[i] & 0x3FFFF);
2714
2715                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2716                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2717                                unique_indirect_regs[i] >> 20);
2718                 }
2719         }
2720
2721         kfree(register_list_format);
2722         return 0;
2723 }
2724
2725 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2726 {
2727         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2728 }
2729
2730 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2731                                              bool enable)
2732 {
2733         uint32_t data = 0;
2734         uint32_t default_data = 0;
2735
2736         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2737         if (enable == true) {
2738                 /* enable GFXIP control over CGPG */
2739                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2740                 if(default_data != data)
2741                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2742
2743                 /* update status */
2744                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2745                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2746                 if(default_data != data)
2747                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2748         } else {
2749                 /* restore GFXIP control over GCPG */
2750                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2751                 if(default_data != data)
2752                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2753         }
2754 }
2755
2756 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2757 {
2758         uint32_t data = 0;
2759
2760         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2761                               AMD_PG_SUPPORT_GFX_SMG |
2762                               AMD_PG_SUPPORT_GFX_DMG)) {
2763                 /* init IDLE_POLL_COUNT = 60 */
2764                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2765                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2766                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2767                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2768
2769                 /* init RLC PG Delay */
2770                 data = 0;
2771                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2772                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2773                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2774                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2775                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2776
2777                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2778                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2779                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2780                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2781
2782                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2783                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2784                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2785                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2786
2787                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2788                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2789
2790                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2791                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2792                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2793
2794                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2795         }
2796 }
2797
2798 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2799                                                 bool enable)
2800 {
2801         uint32_t data = 0;
2802         uint32_t default_data = 0;
2803
2804         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2805         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2806                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2807                              enable ? 1 : 0);
2808         if (default_data != data)
2809                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2810 }
2811
2812 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2813                                                 bool enable)
2814 {
2815         uint32_t data = 0;
2816         uint32_t default_data = 0;
2817
2818         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2819         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2820                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2821                              enable ? 1 : 0);
2822         if(default_data != data)
2823                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2824 }
2825
2826 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2827                                         bool enable)
2828 {
2829         uint32_t data = 0;
2830         uint32_t default_data = 0;
2831
2832         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2833         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2834                              CP_PG_DISABLE,
2835                              enable ? 0 : 1);
2836         if(default_data != data)
2837                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2838 }
2839
2840 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2841                                                 bool enable)
2842 {
2843         uint32_t data, default_data;
2844
2845         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2846         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2847                              GFX_POWER_GATING_ENABLE,
2848                              enable ? 1 : 0);
2849         if(default_data != data)
2850                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2851 }
2852
2853 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2854                                                 bool enable)
2855 {
2856         uint32_t data, default_data;
2857
2858         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2859         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2860                              GFX_PIPELINE_PG_ENABLE,
2861                              enable ? 1 : 0);
2862         if(default_data != data)
2863                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2864
2865         if (!enable)
2866                 /* read any GFX register to wake up GFX */
2867                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2868 }
2869
2870 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2871                                                        bool enable)
2872 {
2873         uint32_t data, default_data;
2874
2875         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2876         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2877                              STATIC_PER_CU_PG_ENABLE,
2878                              enable ? 1 : 0);
2879         if(default_data != data)
2880                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2881 }
2882
2883 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2884                                                 bool enable)
2885 {
2886         uint32_t data, default_data;
2887
2888         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2889         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2890                              DYN_PER_CU_PG_ENABLE,
2891                              enable ? 1 : 0);
2892         if(default_data != data)
2893                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2894 }
2895
2896 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2897 {
2898         gfx_v9_0_init_csb(adev);
2899
2900         /*
2901          * Rlc save restore list is workable since v2_1.
2902          * And it's needed by gfxoff feature.
2903          */
2904         if (adev->gfx.rlc.is_rlc_v2_1) {
2905                 if (adev->asic_type == CHIP_VEGA12 ||
2906                     (adev->asic_type == CHIP_RAVEN &&
2907                      adev->rev_id >= 8))
2908                         gfx_v9_1_init_rlc_save_restore_list(adev);
2909                 gfx_v9_0_enable_save_restore_machine(adev);
2910         }
2911
2912         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2913                               AMD_PG_SUPPORT_GFX_SMG |
2914                               AMD_PG_SUPPORT_GFX_DMG |
2915                               AMD_PG_SUPPORT_CP |
2916                               AMD_PG_SUPPORT_GDS |
2917                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2918                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2919                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2920                 gfx_v9_0_init_gfx_power_gating(adev);
2921         }
2922 }
2923
2924 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2925 {
2926         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2927         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2928         gfx_v9_0_wait_for_rlc_serdes(adev);
2929 }
2930
2931 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2932 {
2933         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2934         udelay(50);
2935         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2936         udelay(50);
2937 }
2938
2939 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2940 {
2941 #ifdef AMDGPU_RLC_DEBUG_RETRY
2942         u32 rlc_ucode_ver;
2943 #endif
2944
2945         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2946         udelay(50);
2947
2948         /* carrizo do enable cp interrupt after cp inited */
2949         if (!(adev->flags & AMD_IS_APU)) {
2950                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2951                 udelay(50);
2952         }
2953
2954 #ifdef AMDGPU_RLC_DEBUG_RETRY
2955         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2956         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2957         if(rlc_ucode_ver == 0x108) {
2958                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2959                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2960                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2961                  * default is 0x9C4 to create a 100us interval */
2962                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2963                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2964                  * to disable the page fault retry interrupts, default is
2965                  * 0x100 (256) */
2966                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2967         }
2968 #endif
2969 }
2970
2971 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2972 {
2973         const struct rlc_firmware_header_v2_0 *hdr;
2974         const __le32 *fw_data;
2975         unsigned i, fw_size;
2976
2977         if (!adev->gfx.rlc_fw)
2978                 return -EINVAL;
2979
2980         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2981         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2982
2983         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2984                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2985         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2986
2987         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2988                         RLCG_UCODE_LOADING_START_ADDRESS);
2989         for (i = 0; i < fw_size; i++)
2990                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2991         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2992
2993         return 0;
2994 }
2995
2996 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2997 {
2998         int r;
2999
3000         if (amdgpu_sriov_vf(adev)) {
3001                 gfx_v9_0_init_csb(adev);
3002                 return 0;
3003         }
3004
3005         adev->gfx.rlc.funcs->stop(adev);
3006
3007         /* disable CG */
3008         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3009
3010         gfx_v9_0_init_pg(adev);
3011
3012         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3013                 /* legacy rlc firmware loading */
3014                 r = gfx_v9_0_rlc_load_microcode(adev);
3015                 if (r)
3016                         return r;
3017         }
3018
3019         switch (adev->asic_type) {
3020         case CHIP_RAVEN:
3021                 if (amdgpu_lbpw == 0)
3022                         gfx_v9_0_enable_lbpw(adev, false);
3023                 else
3024                         gfx_v9_0_enable_lbpw(adev, true);
3025                 break;
3026         case CHIP_VEGA20:
3027                 if (amdgpu_lbpw > 0)
3028                         gfx_v9_0_enable_lbpw(adev, true);
3029                 else
3030                         gfx_v9_0_enable_lbpw(adev, false);
3031                 break;
3032         default:
3033                 break;
3034         }
3035
3036         adev->gfx.rlc.funcs->start(adev);
3037
3038         return 0;
3039 }
3040
3041 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3042 {
3043         int i;
3044         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3045
3046         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3047         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3048         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3049         if (!enable) {
3050                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3051                         adev->gfx.gfx_ring[i].sched.ready = false;
3052         }
3053         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3054         udelay(50);
3055 }
3056
3057 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3058 {
3059         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3060         const struct gfx_firmware_header_v1_0 *ce_hdr;
3061         const struct gfx_firmware_header_v1_0 *me_hdr;
3062         const __le32 *fw_data;
3063         unsigned i, fw_size;
3064
3065         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3066                 return -EINVAL;
3067
3068         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3069                 adev->gfx.pfp_fw->data;
3070         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3071                 adev->gfx.ce_fw->data;
3072         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3073                 adev->gfx.me_fw->data;
3074
3075         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3076         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3077         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3078
3079         gfx_v9_0_cp_gfx_enable(adev, false);
3080
3081         /* PFP */
3082         fw_data = (const __le32 *)
3083                 (adev->gfx.pfp_fw->data +
3084                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3085         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3086         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3087         for (i = 0; i < fw_size; i++)
3088                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3089         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3090
3091         /* CE */
3092         fw_data = (const __le32 *)
3093                 (adev->gfx.ce_fw->data +
3094                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3095         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3096         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3097         for (i = 0; i < fw_size; i++)
3098                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3099         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3100
3101         /* ME */
3102         fw_data = (const __le32 *)
3103                 (adev->gfx.me_fw->data +
3104                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3105         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3106         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3107         for (i = 0; i < fw_size; i++)
3108                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3109         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3110
3111         return 0;
3112 }
3113
3114 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3115 {
3116         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3117         const struct cs_section_def *sect = NULL;
3118         const struct cs_extent_def *ext = NULL;
3119         int r, i, tmp;
3120
3121         /* init the CP */
3122         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3123         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3124
3125         gfx_v9_0_cp_gfx_enable(adev, true);
3126
3127         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3128         if (r) {
3129                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3130                 return r;
3131         }
3132
3133         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3134         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3135
3136         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3137         amdgpu_ring_write(ring, 0x80000000);
3138         amdgpu_ring_write(ring, 0x80000000);
3139
3140         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3141                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3142                         if (sect->id == SECT_CONTEXT) {
3143                                 amdgpu_ring_write(ring,
3144                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3145                                                ext->reg_count));
3146                                 amdgpu_ring_write(ring,
3147                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3148                                 for (i = 0; i < ext->reg_count; i++)
3149                                         amdgpu_ring_write(ring, ext->extent[i]);
3150                         }
3151                 }
3152         }
3153
3154         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3155         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3156
3157         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3158         amdgpu_ring_write(ring, 0);
3159
3160         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3161         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3162         amdgpu_ring_write(ring, 0x8000);
3163         amdgpu_ring_write(ring, 0x8000);
3164
3165         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3166         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3167                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3168         amdgpu_ring_write(ring, tmp);
3169         amdgpu_ring_write(ring, 0);
3170
3171         amdgpu_ring_commit(ring);
3172
3173         return 0;
3174 }
3175
3176 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3177 {
3178         struct amdgpu_ring *ring;
3179         u32 tmp;
3180         u32 rb_bufsz;
3181         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3182
3183         /* Set the write pointer delay */
3184         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3185
3186         /* set the RB to use vmid 0 */
3187         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3188
3189         /* Set ring buffer size */
3190         ring = &adev->gfx.gfx_ring[0];
3191         rb_bufsz = order_base_2(ring->ring_size / 8);
3192         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3193         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3194 #ifdef __BIG_ENDIAN
3195         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3196 #endif
3197         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3198
3199         /* Initialize the ring buffer's write pointers */
3200         ring->wptr = 0;
3201         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3202         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3203
3204         /* set the wb address wether it's enabled or not */
3205         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3206         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3207         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3208
3209         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3210         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3211         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3212
3213         mdelay(1);
3214         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3215
3216         rb_addr = ring->gpu_addr >> 8;
3217         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3218         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3219
3220         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3221         if (ring->use_doorbell) {
3222                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3223                                     DOORBELL_OFFSET, ring->doorbell_index);
3224                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3225                                     DOORBELL_EN, 1);
3226         } else {
3227                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3228         }
3229         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3230
3231         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3232                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3233         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3234
3235         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3236                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3237
3238
3239         /* start the ring */
3240         gfx_v9_0_cp_gfx_start(adev);
3241         ring->sched.ready = true;
3242
3243         return 0;
3244 }
3245
3246 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3247 {
3248         int i;
3249
3250         if (enable) {
3251                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3252         } else {
3253                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3254                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3255                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3256                         adev->gfx.compute_ring[i].sched.ready = false;
3257                 adev->gfx.kiq.ring.sched.ready = false;
3258         }
3259         udelay(50);
3260 }
3261
3262 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3263 {
3264         const struct gfx_firmware_header_v1_0 *mec_hdr;
3265         const __le32 *fw_data;
3266         unsigned i;
3267         u32 tmp;
3268
3269         if (!adev->gfx.mec_fw)
3270                 return -EINVAL;
3271
3272         gfx_v9_0_cp_compute_enable(adev, false);
3273
3274         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3275         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3276
3277         fw_data = (const __le32 *)
3278                 (adev->gfx.mec_fw->data +
3279                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3280         tmp = 0;
3281         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3282         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3283         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3284
3285         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3286                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3287         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3288                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3289
3290         /* MEC1 */
3291         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3292                          mec_hdr->jt_offset);
3293         for (i = 0; i < mec_hdr->jt_size; i++)
3294                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3295                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3296
3297         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3298                         adev->gfx.mec_fw_version);
3299         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3300
3301         return 0;
3302 }
3303
3304 /* KIQ functions */
3305 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3306 {
3307         uint32_t tmp;
3308         struct amdgpu_device *adev = ring->adev;
3309
3310         /* tell RLC which is KIQ queue */
3311         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3312         tmp &= 0xffffff00;
3313         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3314         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3315         tmp |= 0x80;
3316         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3317 }
3318
3319 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3320 {
3321         struct amdgpu_device *adev = ring->adev;
3322
3323         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3324                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
3325                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3326                         ring->has_high_prio = true;
3327                         mqd->cp_hqd_queue_priority =
3328                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3329                 } else {
3330                         ring->has_high_prio = false;
3331                 }
3332         }
3333 }
3334
3335 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3336 {
3337         struct amdgpu_device *adev = ring->adev;
3338         struct v9_mqd *mqd = ring->mqd_ptr;
3339         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3340         uint32_t tmp;
3341
3342         mqd->header = 0xC0310800;
3343         mqd->compute_pipelinestat_enable = 0x00000001;
3344         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3345         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3346         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3347         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3348         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3349         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3350         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3351         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3352         mqd->compute_misc_reserved = 0x00000003;
3353
3354         mqd->dynamic_cu_mask_addr_lo =
3355                 lower_32_bits(ring->mqd_gpu_addr
3356                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3357         mqd->dynamic_cu_mask_addr_hi =
3358                 upper_32_bits(ring->mqd_gpu_addr
3359                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3360
3361         eop_base_addr = ring->eop_gpu_addr >> 8;
3362         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3363         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3364
3365         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3366         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3367         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3368                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3369
3370         mqd->cp_hqd_eop_control = tmp;
3371
3372         /* enable doorbell? */
3373         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3374
3375         if (ring->use_doorbell) {
3376                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3377                                     DOORBELL_OFFSET, ring->doorbell_index);
3378                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3379                                     DOORBELL_EN, 1);
3380                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3381                                     DOORBELL_SOURCE, 0);
3382                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3383                                     DOORBELL_HIT, 0);
3384         } else {
3385                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3386                                          DOORBELL_EN, 0);
3387         }
3388
3389         mqd->cp_hqd_pq_doorbell_control = tmp;
3390
3391         /* disable the queue if it's active */
3392         ring->wptr = 0;
3393         mqd->cp_hqd_dequeue_request = 0;
3394         mqd->cp_hqd_pq_rptr = 0;
3395         mqd->cp_hqd_pq_wptr_lo = 0;
3396         mqd->cp_hqd_pq_wptr_hi = 0;
3397
3398         /* set the pointer to the MQD */
3399         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3400         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3401
3402         /* set MQD vmid to 0 */
3403         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3404         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3405         mqd->cp_mqd_control = tmp;
3406
3407         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3408         hqd_gpu_addr = ring->gpu_addr >> 8;
3409         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3410         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3411
3412         /* set up the HQD, this is similar to CP_RB0_CNTL */
3413         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3414         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3415                             (order_base_2(ring->ring_size / 4) - 1));
3416         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3417                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3418 #ifdef __BIG_ENDIAN
3419         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3420 #endif
3421         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3422         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3423         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3424         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3425         mqd->cp_hqd_pq_control = tmp;
3426
3427         /* set the wb address whether it's enabled or not */
3428         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3429         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3430         mqd->cp_hqd_pq_rptr_report_addr_hi =
3431                 upper_32_bits(wb_gpu_addr) & 0xffff;
3432
3433         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3434         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3435         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3436         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3437
3438         tmp = 0;
3439         /* enable the doorbell if requested */
3440         if (ring->use_doorbell) {
3441                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3442                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3443                                 DOORBELL_OFFSET, ring->doorbell_index);
3444
3445                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3446                                          DOORBELL_EN, 1);
3447                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3448                                          DOORBELL_SOURCE, 0);
3449                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3450                                          DOORBELL_HIT, 0);
3451         }
3452
3453         mqd->cp_hqd_pq_doorbell_control = tmp;
3454
3455         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3456         ring->wptr = 0;
3457         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3458
3459         /* set the vmid for the queue */
3460         mqd->cp_hqd_vmid = 0;
3461
3462         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3463         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3464         mqd->cp_hqd_persistent_state = tmp;
3465
3466         /* set MIN_IB_AVAIL_SIZE */
3467         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3468         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3469         mqd->cp_hqd_ib_control = tmp;
3470
3471         /* set static priority for a queue/ring */
3472         gfx_v9_0_mqd_set_priority(ring, mqd);
3473         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3474
3475         /* map_queues packet doesn't need activate the queue,
3476          * so only kiq need set this field.
3477          */
3478         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3479                 mqd->cp_hqd_active = 1;
3480
3481         return 0;
3482 }
3483
3484 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3485 {
3486         struct amdgpu_device *adev = ring->adev;
3487         struct v9_mqd *mqd = ring->mqd_ptr;
3488         int j;
3489
3490         /* disable wptr polling */
3491         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3492
3493         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3494                mqd->cp_hqd_eop_base_addr_lo);
3495         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3496                mqd->cp_hqd_eop_base_addr_hi);
3497
3498         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3499         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3500                mqd->cp_hqd_eop_control);
3501
3502         /* enable doorbell? */
3503         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3504                mqd->cp_hqd_pq_doorbell_control);
3505
3506         /* disable the queue if it's active */
3507         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3508                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3509                 for (j = 0; j < adev->usec_timeout; j++) {
3510                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3511                                 break;
3512                         udelay(1);
3513                 }
3514                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3515                        mqd->cp_hqd_dequeue_request);
3516                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3517                        mqd->cp_hqd_pq_rptr);
3518                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3519                        mqd->cp_hqd_pq_wptr_lo);
3520                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3521                        mqd->cp_hqd_pq_wptr_hi);
3522         }
3523
3524         /* set the pointer to the MQD */
3525         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3526                mqd->cp_mqd_base_addr_lo);
3527         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3528                mqd->cp_mqd_base_addr_hi);
3529
3530         /* set MQD vmid to 0 */
3531         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3532                mqd->cp_mqd_control);
3533
3534         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3535         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3536                mqd->cp_hqd_pq_base_lo);
3537         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3538                mqd->cp_hqd_pq_base_hi);
3539
3540         /* set up the HQD, this is similar to CP_RB0_CNTL */
3541         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3542                mqd->cp_hqd_pq_control);
3543
3544         /* set the wb address whether it's enabled or not */
3545         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3546                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3547         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3548                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3549
3550         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3551         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3552                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3553         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3554                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3555
3556         /* enable the doorbell if requested */
3557         if (ring->use_doorbell) {
3558                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3559                                         (adev->doorbell_index.kiq * 2) << 2);
3560                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3561                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3562         }
3563
3564         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3565                mqd->cp_hqd_pq_doorbell_control);
3566
3567         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3568         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3569                mqd->cp_hqd_pq_wptr_lo);
3570         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3571                mqd->cp_hqd_pq_wptr_hi);
3572
3573         /* set the vmid for the queue */
3574         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3575
3576         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3577                mqd->cp_hqd_persistent_state);
3578
3579         /* activate the queue */
3580         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3581                mqd->cp_hqd_active);
3582
3583         if (ring->use_doorbell)
3584                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3585
3586         return 0;
3587 }
3588
3589 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3590 {
3591         struct amdgpu_device *adev = ring->adev;
3592         int j;
3593
3594         /* disable the queue if it's active */
3595         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3596
3597                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3598
3599                 for (j = 0; j < adev->usec_timeout; j++) {
3600                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3601                                 break;
3602                         udelay(1);
3603                 }
3604
3605                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3606                         DRM_DEBUG("KIQ dequeue request failed.\n");
3607
3608                         /* Manual disable if dequeue request times out */
3609                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3610                 }
3611
3612                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3613                       0);
3614         }
3615
3616         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3617         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3618         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3619         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3620         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3621         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3622         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3623         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3624
3625         return 0;
3626 }
3627
3628 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3629 {
3630         struct amdgpu_device *adev = ring->adev;
3631         struct v9_mqd *mqd = ring->mqd_ptr;
3632         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3633
3634         gfx_v9_0_kiq_setting(ring);
3635
3636         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3637                 /* reset MQD to a clean status */
3638                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3639                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3640
3641                 /* reset ring buffer */
3642                 ring->wptr = 0;
3643                 amdgpu_ring_clear_ring(ring);
3644
3645                 mutex_lock(&adev->srbm_mutex);
3646                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3647                 gfx_v9_0_kiq_init_register(ring);
3648                 soc15_grbm_select(adev, 0, 0, 0, 0);
3649                 mutex_unlock(&adev->srbm_mutex);
3650         } else {
3651                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3652                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3653                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3654                 mutex_lock(&adev->srbm_mutex);
3655                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3656                 gfx_v9_0_mqd_init(ring);
3657                 gfx_v9_0_kiq_init_register(ring);
3658                 soc15_grbm_select(adev, 0, 0, 0, 0);
3659                 mutex_unlock(&adev->srbm_mutex);
3660
3661                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3662                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3663         }
3664
3665         return 0;
3666 }
3667
3668 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3669 {
3670         struct amdgpu_device *adev = ring->adev;
3671         struct v9_mqd *mqd = ring->mqd_ptr;
3672         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3673
3674         if (!adev->in_gpu_reset && !adev->in_suspend) {
3675                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3676                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3677                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3678                 mutex_lock(&adev->srbm_mutex);
3679                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3680                 gfx_v9_0_mqd_init(ring);
3681                 soc15_grbm_select(adev, 0, 0, 0, 0);
3682                 mutex_unlock(&adev->srbm_mutex);
3683
3684                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3685                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3686         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3687                 /* reset MQD to a clean status */
3688                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3689                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3690
3691                 /* reset ring buffer */
3692                 ring->wptr = 0;
3693                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3694                 amdgpu_ring_clear_ring(ring);
3695         } else {
3696                 amdgpu_ring_clear_ring(ring);
3697         }
3698
3699         return 0;
3700 }
3701
3702 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3703 {
3704         struct amdgpu_ring *ring;
3705         int r;
3706
3707         ring = &adev->gfx.kiq.ring;
3708
3709         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3710         if (unlikely(r != 0))
3711                 return r;
3712
3713         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3714         if (unlikely(r != 0))
3715                 return r;
3716
3717         gfx_v9_0_kiq_init_queue(ring);
3718         amdgpu_bo_kunmap(ring->mqd_obj);
3719         ring->mqd_ptr = NULL;
3720         amdgpu_bo_unreserve(ring->mqd_obj);
3721         ring->sched.ready = true;
3722         return 0;
3723 }
3724
3725 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3726 {
3727         struct amdgpu_ring *ring = NULL;
3728         int r = 0, i;
3729
3730         gfx_v9_0_cp_compute_enable(adev, true);
3731
3732         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3733                 ring = &adev->gfx.compute_ring[i];
3734
3735                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3736                 if (unlikely(r != 0))
3737                         goto done;
3738                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3739                 if (!r) {
3740                         r = gfx_v9_0_kcq_init_queue(ring);
3741                         amdgpu_bo_kunmap(ring->mqd_obj);
3742                         ring->mqd_ptr = NULL;
3743                 }
3744                 amdgpu_bo_unreserve(ring->mqd_obj);
3745                 if (r)
3746                         goto done;
3747         }
3748
3749         r = amdgpu_gfx_enable_kcq(adev);
3750 done:
3751         return r;
3752 }
3753
3754 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3755 {
3756         int r, i;
3757         struct amdgpu_ring *ring;
3758
3759         if (!(adev->flags & AMD_IS_APU))
3760                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3761
3762         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3763                 if (adev->asic_type != CHIP_ARCTURUS) {
3764                         /* legacy firmware loading */
3765                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3766                         if (r)
3767                                 return r;
3768                 }
3769
3770                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3771                 if (r)
3772                         return r;
3773         }
3774
3775         r = gfx_v9_0_kiq_resume(adev);
3776         if (r)
3777                 return r;
3778
3779         if (adev->asic_type != CHIP_ARCTURUS) {
3780                 r = gfx_v9_0_cp_gfx_resume(adev);
3781                 if (r)
3782                         return r;
3783         }
3784
3785         r = gfx_v9_0_kcq_resume(adev);
3786         if (r)
3787                 return r;
3788
3789         if (adev->asic_type != CHIP_ARCTURUS) {
3790                 ring = &adev->gfx.gfx_ring[0];
3791                 r = amdgpu_ring_test_helper(ring);
3792                 if (r)
3793                         return r;
3794         }
3795
3796         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3797                 ring = &adev->gfx.compute_ring[i];
3798                 amdgpu_ring_test_helper(ring);
3799         }
3800
3801         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3802
3803         return 0;
3804 }
3805
3806 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3807 {
3808         u32 tmp;
3809
3810         if (adev->asic_type != CHIP_ARCTURUS)
3811                 return;
3812
3813         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3814         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3815                                 adev->df.hash_status.hash_64k);
3816         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3817                                 adev->df.hash_status.hash_2m);
3818         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3819                                 adev->df.hash_status.hash_1g);
3820         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3821 }
3822
3823 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3824 {
3825         if (adev->asic_type != CHIP_ARCTURUS)
3826                 gfx_v9_0_cp_gfx_enable(adev, enable);
3827         gfx_v9_0_cp_compute_enable(adev, enable);
3828 }
3829
3830 static int gfx_v9_0_hw_init(void *handle)
3831 {
3832         int r;
3833         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3834
3835         if (!amdgpu_sriov_vf(adev))
3836                 gfx_v9_0_init_golden_registers(adev);
3837
3838         gfx_v9_0_constants_init(adev);
3839
3840         gfx_v9_0_init_tcp_config(adev);
3841
3842         r = adev->gfx.rlc.funcs->resume(adev);
3843         if (r)
3844                 return r;
3845
3846         r = gfx_v9_0_cp_resume(adev);
3847         if (r)
3848                 return r;
3849
3850         return r;
3851 }
3852
3853 static int gfx_v9_0_hw_fini(void *handle)
3854 {
3855         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3856
3857         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3858         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3859         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3860
3861         /* DF freeze and kcq disable will fail */
3862         if (!amdgpu_ras_intr_triggered())
3863                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3864                 amdgpu_gfx_disable_kcq(adev);
3865
3866         if (amdgpu_sriov_vf(adev)) {
3867                 gfx_v9_0_cp_gfx_enable(adev, false);
3868                 /* must disable polling for SRIOV when hw finished, otherwise
3869                  * CPC engine may still keep fetching WB address which is already
3870                  * invalid after sw finished and trigger DMAR reading error in
3871                  * hypervisor side.
3872                  */
3873                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3874                 return 0;
3875         }
3876
3877         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3878          * otherwise KIQ is hanging when binding back
3879          */
3880         if (!adev->in_gpu_reset && !adev->in_suspend) {
3881                 mutex_lock(&adev->srbm_mutex);
3882                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3883                                 adev->gfx.kiq.ring.pipe,
3884                                 adev->gfx.kiq.ring.queue, 0);
3885                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3886                 soc15_grbm_select(adev, 0, 0, 0, 0);
3887                 mutex_unlock(&adev->srbm_mutex);
3888         }
3889
3890         gfx_v9_0_cp_enable(adev, false);
3891         adev->gfx.rlc.funcs->stop(adev);
3892
3893         return 0;
3894 }
3895
3896 static int gfx_v9_0_suspend(void *handle)
3897 {
3898         return gfx_v9_0_hw_fini(handle);
3899 }
3900
3901 static int gfx_v9_0_resume(void *handle)
3902 {
3903         return gfx_v9_0_hw_init(handle);
3904 }
3905
3906 static bool gfx_v9_0_is_idle(void *handle)
3907 {
3908         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3909
3910         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3911                                 GRBM_STATUS, GUI_ACTIVE))
3912                 return false;
3913         else
3914                 return true;
3915 }
3916
3917 static int gfx_v9_0_wait_for_idle(void *handle)
3918 {
3919         unsigned i;
3920         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3921
3922         for (i = 0; i < adev->usec_timeout; i++) {
3923                 if (gfx_v9_0_is_idle(handle))
3924                         return 0;
3925                 udelay(1);
3926         }
3927         return -ETIMEDOUT;
3928 }
3929
3930 static int gfx_v9_0_soft_reset(void *handle)
3931 {
3932         u32 grbm_soft_reset = 0;
3933         u32 tmp;
3934         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3935
3936         /* GRBM_STATUS */
3937         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3938         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3939                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3940                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3941                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3942                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3943                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3944                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3945                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3946                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3947                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3948         }
3949
3950         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3951                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3952                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3953         }
3954
3955         /* GRBM_STATUS2 */
3956         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3957         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3958                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3959                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3960
3961
3962         if (grbm_soft_reset) {
3963                 /* stop the rlc */
3964                 adev->gfx.rlc.funcs->stop(adev);
3965
3966                 if (adev->asic_type != CHIP_ARCTURUS)
3967                         /* Disable GFX parsing/prefetching */
3968                         gfx_v9_0_cp_gfx_enable(adev, false);
3969
3970                 /* Disable MEC parsing/prefetching */
3971                 gfx_v9_0_cp_compute_enable(adev, false);
3972
3973                 if (grbm_soft_reset) {
3974                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3975                         tmp |= grbm_soft_reset;
3976                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3977                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3978                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3979
3980                         udelay(50);
3981
3982                         tmp &= ~grbm_soft_reset;
3983                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3984                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3985                 }
3986
3987                 /* Wait a little for things to settle down */
3988                 udelay(50);
3989         }
3990         return 0;
3991 }
3992
3993 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3994 {
3995         signed long r, cnt = 0;
3996         unsigned long flags;
3997         uint32_t seq;
3998         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3999         struct amdgpu_ring *ring = &kiq->ring;
4000
4001         BUG_ON(!ring->funcs->emit_rreg);
4002
4003         spin_lock_irqsave(&kiq->ring_lock, flags);
4004         amdgpu_ring_alloc(ring, 32);
4005         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4006         amdgpu_ring_write(ring, 9 |     /* src: register*/
4007                                 (5 << 8) |      /* dst: memory */
4008                                 (1 << 16) |     /* count sel */
4009                                 (1 << 20));     /* write confirm */
4010         amdgpu_ring_write(ring, 0);
4011         amdgpu_ring_write(ring, 0);
4012         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4013                                 kiq->reg_val_offs * 4));
4014         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4015                                 kiq->reg_val_offs * 4));
4016         amdgpu_fence_emit_polling(ring, &seq);
4017         amdgpu_ring_commit(ring);
4018         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4019
4020         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4021
4022         /* don't wait anymore for gpu reset case because this way may
4023          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4024          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4025          * never return if we keep waiting in virt_kiq_rreg, which cause
4026          * gpu_recover() hang there.
4027          *
4028          * also don't wait anymore for IRQ context
4029          * */
4030         if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
4031                 goto failed_kiq_read;
4032
4033         might_sleep();
4034         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4035                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4036                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4037         }
4038
4039         if (cnt > MAX_KIQ_REG_TRY)
4040                 goto failed_kiq_read;
4041
4042         return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
4043                 (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
4044
4045 failed_kiq_read:
4046         pr_err("failed to read gpu clock\n");
4047         return ~0;
4048 }
4049
4050 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4051 {
4052         uint64_t clock;
4053
4054         amdgpu_gfx_off_ctrl(adev, false);
4055         mutex_lock(&adev->gfx.gpu_clock_mutex);
4056         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4057                 clock = gfx_v9_0_kiq_read_clock(adev);
4058         } else {
4059                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4060                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4061                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4062         }
4063         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4064         amdgpu_gfx_off_ctrl(adev, true);
4065         return clock;
4066 }
4067
4068 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4069                                           uint32_t vmid,
4070                                           uint32_t gds_base, uint32_t gds_size,
4071                                           uint32_t gws_base, uint32_t gws_size,
4072                                           uint32_t oa_base, uint32_t oa_size)
4073 {
4074         struct amdgpu_device *adev = ring->adev;
4075
4076         /* GDS Base */
4077         gfx_v9_0_write_data_to_reg(ring, 0, false,
4078                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4079                                    gds_base);
4080
4081         /* GDS Size */
4082         gfx_v9_0_write_data_to_reg(ring, 0, false,
4083                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4084                                    gds_size);
4085
4086         /* GWS */
4087         gfx_v9_0_write_data_to_reg(ring, 0, false,
4088                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4089                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4090
4091         /* OA */
4092         gfx_v9_0_write_data_to_reg(ring, 0, false,
4093                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4094                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4095 }
4096
4097 static const u32 vgpr_init_compute_shader[] =
4098 {
4099         0xb07c0000, 0xbe8000ff,
4100         0x000000f8, 0xbf110800,
4101         0x7e000280, 0x7e020280,
4102         0x7e040280, 0x7e060280,
4103         0x7e080280, 0x7e0a0280,
4104         0x7e0c0280, 0x7e0e0280,
4105         0x80808800, 0xbe803200,
4106         0xbf84fff5, 0xbf9c0000,
4107         0xd28c0001, 0x0001007f,
4108         0xd28d0001, 0x0002027e,
4109         0x10020288, 0xb8810904,
4110         0xb7814000, 0xd1196a01,
4111         0x00000301, 0xbe800087,
4112         0xbefc00c1, 0xd89c4000,
4113         0x00020201, 0xd89cc080,
4114         0x00040401, 0x320202ff,
4115         0x00000800, 0x80808100,
4116         0xbf84fff8, 0x7e020280,
4117         0xbf810000, 0x00000000,
4118 };
4119
4120 static const u32 sgpr_init_compute_shader[] =
4121 {
4122         0xb07c0000, 0xbe8000ff,
4123         0x0000005f, 0xbee50080,
4124         0xbe812c65, 0xbe822c65,
4125         0xbe832c65, 0xbe842c65,
4126         0xbe852c65, 0xb77c0005,
4127         0x80808500, 0xbf84fff8,
4128         0xbe800080, 0xbf810000,
4129 };
4130
4131 /* When below register arrays changed, please update gpr_reg_size,
4132   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4133   to cover all gfx9 ASICs */
4134 static const struct soc15_reg_entry vgpr_init_regs[] = {
4135    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4136    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4137    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4138    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4139    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4140    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4141    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4142    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4143    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4144    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4145    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4146    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4147    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4148    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4149 };
4150
4151 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4152    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4153    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4164    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4165    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4166 };
4167
4168 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4169    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4170    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4171    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4172    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4173    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4174    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4175    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4176    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4177    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4178    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4179    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4180    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4181    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4182    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4183 };
4184
4185 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4186    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4187    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4188    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4189    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4190    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4191    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4194    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4195    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4196    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4197    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4198    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4199    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4200    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4201    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4202    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4203    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4204    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4205    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4206    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4207    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4208    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4209    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4210    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4211    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4212    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4213    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4214    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4215    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4216    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4217    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4218    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4219 };
4220
4221 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4222 {
4223         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4224         int i, r;
4225
4226         /* only support when RAS is enabled */
4227         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4228                 return 0;
4229
4230         r = amdgpu_ring_alloc(ring, 7);
4231         if (r) {
4232                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4233                         ring->name, r);
4234                 return r;
4235         }
4236
4237         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4238         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4239
4240         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4241         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4242                                 PACKET3_DMA_DATA_DST_SEL(1) |
4243                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4244                                 PACKET3_DMA_DATA_ENGINE(0)));
4245         amdgpu_ring_write(ring, 0);
4246         amdgpu_ring_write(ring, 0);
4247         amdgpu_ring_write(ring, 0);
4248         amdgpu_ring_write(ring, 0);
4249         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4250                                 adev->gds.gds_size);
4251
4252         amdgpu_ring_commit(ring);
4253
4254         for (i = 0; i < adev->usec_timeout; i++) {
4255                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4256                         break;
4257                 udelay(1);
4258         }
4259
4260         if (i >= adev->usec_timeout)
4261                 r = -ETIMEDOUT;
4262
4263         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4264
4265         return r;
4266 }
4267
4268 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4269 {
4270         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4271         struct amdgpu_ib ib;
4272         struct dma_fence *f = NULL;
4273         int r, i;
4274         unsigned total_size, vgpr_offset, sgpr_offset;
4275         u64 gpu_addr;
4276
4277         int compute_dim_x = adev->gfx.config.max_shader_engines *
4278                                                 adev->gfx.config.max_cu_per_sh *
4279                                                 adev->gfx.config.max_sh_per_se;
4280         int sgpr_work_group_size = 5;
4281         int gpr_reg_size = compute_dim_x / 16 + 6;
4282
4283         /* only support when RAS is enabled */
4284         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4285                 return 0;
4286
4287         /* bail if the compute ring is not ready */
4288         if (!ring->sched.ready)
4289                 return 0;
4290
4291         total_size =
4292                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4293         total_size +=
4294                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4295         total_size +=
4296                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4297         total_size = ALIGN(total_size, 256);
4298         vgpr_offset = total_size;
4299         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4300         sgpr_offset = total_size;
4301         total_size += sizeof(sgpr_init_compute_shader);
4302
4303         /* allocate an indirect buffer to put the commands in */
4304         memset(&ib, 0, sizeof(ib));
4305         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4306         if (r) {
4307                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4308                 return r;
4309         }
4310
4311         /* load the compute shaders */
4312         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4313                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4314
4315         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4316                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4317
4318         /* init the ib length to 0 */
4319         ib.length_dw = 0;
4320
4321         /* VGPR */
4322         /* write the register state for the compute dispatch */
4323         for (i = 0; i < gpr_reg_size; i++) {
4324                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4325                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4326                                                                 - PACKET3_SET_SH_REG_START;
4327                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4328         }
4329         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4330         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4331         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4332         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4333                                                         - PACKET3_SET_SH_REG_START;
4334         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4335         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4336
4337         /* write dispatch packet */
4338         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4339         ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4340         ib.ptr[ib.length_dw++] = 1; /* y */
4341         ib.ptr[ib.length_dw++] = 1; /* z */
4342         ib.ptr[ib.length_dw++] =
4343                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4344
4345         /* write CS partial flush packet */
4346         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4347         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4348
4349         /* SGPR1 */
4350         /* write the register state for the compute dispatch */
4351         for (i = 0; i < gpr_reg_size; i++) {
4352                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4353                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4354                                                                 - PACKET3_SET_SH_REG_START;
4355                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4356         }
4357         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4358         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4359         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4360         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4361                                                         - PACKET3_SET_SH_REG_START;
4362         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4363         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4364
4365         /* write dispatch packet */
4366         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4367         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4368         ib.ptr[ib.length_dw++] = 1; /* y */
4369         ib.ptr[ib.length_dw++] = 1; /* z */
4370         ib.ptr[ib.length_dw++] =
4371                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4372
4373         /* write CS partial flush packet */
4374         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4375         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4376
4377         /* SGPR2 */
4378         /* write the register state for the compute dispatch */
4379         for (i = 0; i < gpr_reg_size; i++) {
4380                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4381                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4382                                                                 - PACKET3_SET_SH_REG_START;
4383                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4384         }
4385         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4386         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4387         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4388         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4389                                                         - PACKET3_SET_SH_REG_START;
4390         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4391         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4392
4393         /* write dispatch packet */
4394         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4395         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4396         ib.ptr[ib.length_dw++] = 1; /* y */
4397         ib.ptr[ib.length_dw++] = 1; /* z */
4398         ib.ptr[ib.length_dw++] =
4399                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4400
4401         /* write CS partial flush packet */
4402         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4403         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4404
4405         /* shedule the ib on the ring */
4406         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4407         if (r) {
4408                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4409                 goto fail;
4410         }
4411
4412         /* wait for the GPU to finish processing the IB */
4413         r = dma_fence_wait(f, false);
4414         if (r) {
4415                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4416                 goto fail;
4417         }
4418
4419 fail:
4420         amdgpu_ib_free(adev, &ib, NULL);
4421         dma_fence_put(f);
4422
4423         return r;
4424 }
4425
4426 static int gfx_v9_0_early_init(void *handle)
4427 {
4428         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4429
4430         if (adev->asic_type == CHIP_ARCTURUS)
4431                 adev->gfx.num_gfx_rings = 0;
4432         else
4433                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4434         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4435         gfx_v9_0_set_kiq_pm4_funcs(adev);
4436         gfx_v9_0_set_ring_funcs(adev);
4437         gfx_v9_0_set_irq_funcs(adev);
4438         gfx_v9_0_set_gds_init(adev);
4439         gfx_v9_0_set_rlc_funcs(adev);
4440
4441         return 0;
4442 }
4443
4444 static int gfx_v9_0_ecc_late_init(void *handle)
4445 {
4446         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4447         int r;
4448
4449         /*
4450          * Temp workaround to fix the issue that CP firmware fails to
4451          * update read pointer when CPDMA is writing clearing operation
4452          * to GDS in suspend/resume sequence on several cards. So just
4453          * limit this operation in cold boot sequence.
4454          */
4455         if (!adev->in_suspend) {
4456                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4457                 if (r)
4458                         return r;
4459         }
4460
4461         /* requires IBs so do in late init after IB pool is initialized */
4462         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4463         if (r)
4464                 return r;
4465
4466         if (adev->gfx.funcs &&
4467             adev->gfx.funcs->reset_ras_error_count)
4468                 adev->gfx.funcs->reset_ras_error_count(adev);
4469
4470         r = amdgpu_gfx_ras_late_init(adev);
4471         if (r)
4472                 return r;
4473
4474         return 0;
4475 }
4476
4477 static int gfx_v9_0_late_init(void *handle)
4478 {
4479         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4480         int r;
4481
4482         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4483         if (r)
4484                 return r;
4485
4486         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4487         if (r)
4488                 return r;
4489
4490         r = gfx_v9_0_ecc_late_init(handle);
4491         if (r)
4492                 return r;
4493
4494         return 0;
4495 }
4496
4497 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4498 {
4499         uint32_t rlc_setting;
4500
4501         /* if RLC is not enabled, do nothing */
4502         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4503         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4504                 return false;
4505
4506         return true;
4507 }
4508
4509 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4510 {
4511         uint32_t data;
4512         unsigned i;
4513
4514         data = RLC_SAFE_MODE__CMD_MASK;
4515         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4516         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4517
4518         /* wait for RLC_SAFE_MODE */
4519         for (i = 0; i < adev->usec_timeout; i++) {
4520                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4521                         break;
4522                 udelay(1);
4523         }
4524 }
4525
4526 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4527 {
4528         uint32_t data;
4529
4530         data = RLC_SAFE_MODE__CMD_MASK;
4531         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4532 }
4533
4534 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4535                                                 bool enable)
4536 {
4537         amdgpu_gfx_rlc_enter_safe_mode(adev);
4538
4539         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4540                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4541                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4542                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4543         } else {
4544                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4545                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4546                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4547         }
4548
4549         amdgpu_gfx_rlc_exit_safe_mode(adev);
4550 }
4551
4552 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4553                                                 bool enable)
4554 {
4555         /* TODO: double check if we need to perform under safe mode */
4556         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4557
4558         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4559                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4560         else
4561                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4562
4563         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4564                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4565         else
4566                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4567
4568         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4569 }
4570
4571 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4572                                                       bool enable)
4573 {
4574         uint32_t data, def;
4575
4576         amdgpu_gfx_rlc_enter_safe_mode(adev);
4577
4578         /* It is disabled by HW by default */
4579         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4580                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4581                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4582
4583                 if (adev->asic_type != CHIP_VEGA12)
4584                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4585
4586                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4587                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4588                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4589
4590                 /* only for Vega10 & Raven1 */
4591                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4592
4593                 if (def != data)
4594                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4595
4596                 /* MGLS is a global flag to control all MGLS in GFX */
4597                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4598                         /* 2 - RLC memory Light sleep */
4599                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4600                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4601                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4602                                 if (def != data)
4603                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4604                         }
4605                         /* 3 - CP memory Light sleep */
4606                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4607                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4608                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4609                                 if (def != data)
4610                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4611                         }
4612                 }
4613         } else {
4614                 /* 1 - MGCG_OVERRIDE */
4615                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4616
4617                 if (adev->asic_type != CHIP_VEGA12)
4618                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4619
4620                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4621                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4622                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4623                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4624
4625                 if (def != data)
4626                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4627
4628                 /* 2 - disable MGLS in RLC */
4629                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4630                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4631                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4632                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4633                 }
4634
4635                 /* 3 - disable MGLS in CP */
4636                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4637                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4638                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4639                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4640                 }
4641         }
4642
4643         amdgpu_gfx_rlc_exit_safe_mode(adev);
4644 }
4645
4646 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4647                                            bool enable)
4648 {
4649         uint32_t data, def;
4650
4651         if (adev->asic_type == CHIP_ARCTURUS)
4652                 return;
4653
4654         amdgpu_gfx_rlc_enter_safe_mode(adev);
4655
4656         /* Enable 3D CGCG/CGLS */
4657         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4658                 /* write cmd to clear cgcg/cgls ov */
4659                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4660                 /* unset CGCG override */
4661                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4662                 /* update CGCG and CGLS override bits */
4663                 if (def != data)
4664                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4665
4666                 /* enable 3Dcgcg FSM(0x0000363f) */
4667                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4668
4669                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4670                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4671                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4672                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4673                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4674                 if (def != data)
4675                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4676
4677                 /* set IDLE_POLL_COUNT(0x00900100) */
4678                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4679                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4680                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4681                 if (def != data)
4682                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4683         } else {
4684                 /* Disable CGCG/CGLS */
4685                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4686                 /* disable cgcg, cgls should be disabled */
4687                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4688                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4689                 /* disable cgcg and cgls in FSM */
4690                 if (def != data)
4691                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4692         }
4693
4694         amdgpu_gfx_rlc_exit_safe_mode(adev);
4695 }
4696
4697 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4698                                                       bool enable)
4699 {
4700         uint32_t def, data;
4701
4702         amdgpu_gfx_rlc_enter_safe_mode(adev);
4703
4704         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4705                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4706                 /* unset CGCG override */
4707                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4708                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4709                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4710                 else
4711                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4712                 /* update CGCG and CGLS override bits */
4713                 if (def != data)
4714                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4715
4716                 /* enable cgcg FSM(0x0000363F) */
4717                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4718
4719                 if (adev->asic_type == CHIP_ARCTURUS)
4720                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4721                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4722                 else
4723                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4724                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4725                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4726                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4727                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4728                 if (def != data)
4729                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4730
4731                 /* set IDLE_POLL_COUNT(0x00900100) */
4732                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4733                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4734                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4735                 if (def != data)
4736                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4737         } else {
4738                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4739                 /* reset CGCG/CGLS bits */
4740                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4741                 /* disable cgcg and cgls in FSM */
4742                 if (def != data)
4743                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4744         }
4745
4746         amdgpu_gfx_rlc_exit_safe_mode(adev);
4747 }
4748
4749 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4750                                             bool enable)
4751 {
4752         if (enable) {
4753                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4754                  * ===  MGCG + MGLS ===
4755                  */
4756                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4757                 /* ===  CGCG /CGLS for GFX 3D Only === */
4758                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4759                 /* ===  CGCG + CGLS === */
4760                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4761         } else {
4762                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4763                  * ===  CGCG + CGLS ===
4764                  */
4765                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4766                 /* ===  CGCG /CGLS for GFX 3D Only === */
4767                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4768                 /* ===  MGCG + MGLS === */
4769                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4770         }
4771         return 0;
4772 }
4773
4774 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4775 {
4776         u32 data;
4777
4778         data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4779
4780         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4781         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4782
4783         WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4784 }
4785
4786 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4787         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4788         .set_safe_mode = gfx_v9_0_set_safe_mode,
4789         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4790         .init = gfx_v9_0_rlc_init,
4791         .get_csb_size = gfx_v9_0_get_csb_size,
4792         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4793         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4794         .resume = gfx_v9_0_rlc_resume,
4795         .stop = gfx_v9_0_rlc_stop,
4796         .reset = gfx_v9_0_rlc_reset,
4797         .start = gfx_v9_0_rlc_start,
4798         .update_spm_vmid = gfx_v9_0_update_spm_vmid
4799 };
4800
4801 static int gfx_v9_0_set_powergating_state(void *handle,
4802                                           enum amd_powergating_state state)
4803 {
4804         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4805         bool enable = (state == AMD_PG_STATE_GATE);
4806
4807         switch (adev->asic_type) {
4808         case CHIP_RAVEN:
4809         case CHIP_RENOIR:
4810                 if (!enable) {
4811                         amdgpu_gfx_off_ctrl(adev, false);
4812                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4813                 }
4814                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4815                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4816                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4817                 } else {
4818                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4819                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4820                 }
4821
4822                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4823                         gfx_v9_0_enable_cp_power_gating(adev, true);
4824                 else
4825                         gfx_v9_0_enable_cp_power_gating(adev, false);
4826
4827                 /* update gfx cgpg state */
4828                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4829
4830                 /* update mgcg state */
4831                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4832
4833                 if (enable)
4834                         amdgpu_gfx_off_ctrl(adev, true);
4835                 break;
4836         case CHIP_VEGA12:
4837                 if (!enable) {
4838                         amdgpu_gfx_off_ctrl(adev, false);
4839                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4840                 } else {
4841                         amdgpu_gfx_off_ctrl(adev, true);
4842                 }
4843                 break;
4844         default:
4845                 break;
4846         }
4847
4848         return 0;
4849 }
4850
4851 static int gfx_v9_0_set_clockgating_state(void *handle,
4852                                           enum amd_clockgating_state state)
4853 {
4854         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4855
4856         if (amdgpu_sriov_vf(adev))
4857                 return 0;
4858
4859         switch (adev->asic_type) {
4860         case CHIP_VEGA10:
4861         case CHIP_VEGA12:
4862         case CHIP_VEGA20:
4863         case CHIP_RAVEN:
4864         case CHIP_ARCTURUS:
4865         case CHIP_RENOIR:
4866                 gfx_v9_0_update_gfx_clock_gating(adev,
4867                                                  state == AMD_CG_STATE_GATE);
4868                 break;
4869         default:
4870                 break;
4871         }
4872         return 0;
4873 }
4874
4875 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4876 {
4877         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4878         int data;
4879
4880         if (amdgpu_sriov_vf(adev))
4881                 *flags = 0;
4882
4883         /* AMD_CG_SUPPORT_GFX_MGCG */
4884         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
4885         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4886                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4887
4888         /* AMD_CG_SUPPORT_GFX_CGCG */
4889         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
4890         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4891                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4892
4893         /* AMD_CG_SUPPORT_GFX_CGLS */
4894         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4895                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4896
4897         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4898         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
4899         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4900                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4901
4902         /* AMD_CG_SUPPORT_GFX_CP_LS */
4903         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
4904         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4905                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4906
4907         if (adev->asic_type != CHIP_ARCTURUS) {
4908                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4909                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
4910                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4911                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4912
4913                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4914                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4915                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4916         }
4917 }
4918
4919 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4920 {
4921         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4922 }
4923
4924 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4925 {
4926         struct amdgpu_device *adev = ring->adev;
4927         u64 wptr;
4928
4929         /* XXX check if swapping is necessary on BE */
4930         if (ring->use_doorbell) {
4931                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4932         } else {
4933                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4934                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4935         }
4936
4937         return wptr;
4938 }
4939
4940 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4941 {
4942         struct amdgpu_device *adev = ring->adev;
4943
4944         if (ring->use_doorbell) {
4945                 /* XXX check if swapping is necessary on BE */
4946                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4947                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4948         } else {
4949                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4950                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4951         }
4952 }
4953
4954 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4955 {
4956         struct amdgpu_device *adev = ring->adev;
4957         u32 ref_and_mask, reg_mem_engine;
4958         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4959
4960         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4961                 switch (ring->me) {
4962                 case 1:
4963                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4964                         break;
4965                 case 2:
4966                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4967                         break;
4968                 default:
4969                         return;
4970                 }
4971                 reg_mem_engine = 0;
4972         } else {
4973                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4974                 reg_mem_engine = 1; /* pfp */
4975         }
4976
4977         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4978                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4979                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4980                               ref_and_mask, ref_and_mask, 0x20);
4981 }
4982
4983 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4984                                         struct amdgpu_job *job,
4985                                         struct amdgpu_ib *ib,
4986                                         uint32_t flags)
4987 {
4988         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4989         u32 header, control = 0;
4990
4991         if (ib->flags & AMDGPU_IB_FLAG_CE)
4992                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4993         else
4994                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4995
4996         control |= ib->length_dw | (vmid << 24);
4997
4998         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4999                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5000
5001                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5002                         gfx_v9_0_ring_emit_de_meta(ring);
5003         }
5004
5005         amdgpu_ring_write(ring, header);
5006         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5007         amdgpu_ring_write(ring,
5008 #ifdef __BIG_ENDIAN
5009                 (2 << 0) |
5010 #endif
5011                 lower_32_bits(ib->gpu_addr));
5012         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5013         amdgpu_ring_write(ring, control);
5014 }
5015
5016 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5017                                           struct amdgpu_job *job,
5018                                           struct amdgpu_ib *ib,
5019                                           uint32_t flags)
5020 {
5021         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5022         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5023
5024         /* Currently, there is a high possibility to get wave ID mismatch
5025          * between ME and GDS, leading to a hw deadlock, because ME generates
5026          * different wave IDs than the GDS expects. This situation happens
5027          * randomly when at least 5 compute pipes use GDS ordered append.
5028          * The wave IDs generated by ME are also wrong after suspend/resume.
5029          * Those are probably bugs somewhere else in the kernel driver.
5030          *
5031          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5032          * GDS to 0 for this ring (me/pipe).
5033          */
5034         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5035                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5036                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5037                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5038         }
5039
5040         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5041         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5042         amdgpu_ring_write(ring,
5043 #ifdef __BIG_ENDIAN
5044                                 (2 << 0) |
5045 #endif
5046                                 lower_32_bits(ib->gpu_addr));
5047         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5048         amdgpu_ring_write(ring, control);
5049 }
5050
5051 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5052                                      u64 seq, unsigned flags)
5053 {
5054         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5055         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5056         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5057
5058         /* RELEASE_MEM - flush caches, send int */
5059         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5060         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5061                                                EOP_TC_NC_ACTION_EN) :
5062                                               (EOP_TCL1_ACTION_EN |
5063                                                EOP_TC_ACTION_EN |
5064                                                EOP_TC_WB_ACTION_EN |
5065                                                EOP_TC_MD_ACTION_EN)) |
5066                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5067                                  EVENT_INDEX(5)));
5068         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5069
5070         /*
5071          * the address should be Qword aligned if 64bit write, Dword
5072          * aligned if only send 32bit data low (discard data high)
5073          */
5074         if (write64bit)
5075                 BUG_ON(addr & 0x7);
5076         else
5077                 BUG_ON(addr & 0x3);
5078         amdgpu_ring_write(ring, lower_32_bits(addr));
5079         amdgpu_ring_write(ring, upper_32_bits(addr));
5080         amdgpu_ring_write(ring, lower_32_bits(seq));
5081         amdgpu_ring_write(ring, upper_32_bits(seq));
5082         amdgpu_ring_write(ring, 0);
5083 }
5084
5085 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5086 {
5087         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5088         uint32_t seq = ring->fence_drv.sync_seq;
5089         uint64_t addr = ring->fence_drv.gpu_addr;
5090
5091         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5092                               lower_32_bits(addr), upper_32_bits(addr),
5093                               seq, 0xffffffff, 4);
5094 }
5095
5096 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5097                                         unsigned vmid, uint64_t pd_addr)
5098 {
5099         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5100
5101         /* compute doesn't have PFP */
5102         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5103                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5104                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5105                 amdgpu_ring_write(ring, 0x0);
5106         }
5107 }
5108
5109 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5110 {
5111         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5112 }
5113
5114 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5115 {
5116         u64 wptr;
5117
5118         /* XXX check if swapping is necessary on BE */
5119         if (ring->use_doorbell)
5120                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5121         else
5122                 BUG();
5123         return wptr;
5124 }
5125
5126 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5127 {
5128         struct amdgpu_device *adev = ring->adev;
5129
5130         /* XXX check if swapping is necessary on BE */
5131         if (ring->use_doorbell) {
5132                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5133                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5134         } else{
5135                 BUG(); /* only DOORBELL method supported on gfx9 now */
5136         }
5137 }
5138
5139 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5140                                          u64 seq, unsigned int flags)
5141 {
5142         struct amdgpu_device *adev = ring->adev;
5143
5144         /* we only allocate 32bit for each seq wb address */
5145         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5146
5147         /* write fence seq to the "addr" */
5148         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5149         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5150                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5151         amdgpu_ring_write(ring, lower_32_bits(addr));
5152         amdgpu_ring_write(ring, upper_32_bits(addr));
5153         amdgpu_ring_write(ring, lower_32_bits(seq));
5154
5155         if (flags & AMDGPU_FENCE_FLAG_INT) {
5156                 /* set register to trigger INT */
5157                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5158                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5159                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5160                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5161                 amdgpu_ring_write(ring, 0);
5162                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5163         }
5164 }
5165
5166 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5167 {
5168         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5169         amdgpu_ring_write(ring, 0);
5170 }
5171
5172 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5173 {
5174         struct v9_ce_ib_state ce_payload = {0};
5175         uint64_t csa_addr;
5176         int cnt;
5177
5178         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5179         csa_addr = amdgpu_csa_vaddr(ring->adev);
5180
5181         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5182         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5183                                  WRITE_DATA_DST_SEL(8) |
5184                                  WR_CONFIRM) |
5185                                  WRITE_DATA_CACHE_POLICY(0));
5186         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5187         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5188         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5189 }
5190
5191 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5192 {
5193         struct v9_de_ib_state de_payload = {0};
5194         uint64_t csa_addr, gds_addr;
5195         int cnt;
5196
5197         csa_addr = amdgpu_csa_vaddr(ring->adev);
5198         gds_addr = csa_addr + 4096;
5199         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5200         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5201
5202         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5203         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5204         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5205                                  WRITE_DATA_DST_SEL(8) |
5206                                  WR_CONFIRM) |
5207                                  WRITE_DATA_CACHE_POLICY(0));
5208         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5209         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5210         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5211 }
5212
5213 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5214 {
5215         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5216         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5217 }
5218
5219 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5220 {
5221         uint32_t dw2 = 0;
5222
5223         if (amdgpu_sriov_vf(ring->adev))
5224                 gfx_v9_0_ring_emit_ce_meta(ring);
5225
5226         gfx_v9_0_ring_emit_tmz(ring, true);
5227
5228         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5229         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5230                 /* set load_global_config & load_global_uconfig */
5231                 dw2 |= 0x8001;
5232                 /* set load_cs_sh_regs */
5233                 dw2 |= 0x01000000;
5234                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5235                 dw2 |= 0x10002;
5236
5237                 /* set load_ce_ram if preamble presented */
5238                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5239                         dw2 |= 0x10000000;
5240         } else {
5241                 /* still load_ce_ram if this is the first time preamble presented
5242                  * although there is no context switch happens.
5243                  */
5244                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5245                         dw2 |= 0x10000000;
5246         }
5247
5248         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5249         amdgpu_ring_write(ring, dw2);
5250         amdgpu_ring_write(ring, 0);
5251 }
5252
5253 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5254 {
5255         unsigned ret;
5256         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5257         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5258         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5259         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5260         ret = ring->wptr & ring->buf_mask;
5261         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5262         return ret;
5263 }
5264
5265 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5266 {
5267         unsigned cur;
5268         BUG_ON(offset > ring->buf_mask);
5269         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5270
5271         cur = (ring->wptr & ring->buf_mask) - 1;
5272         if (likely(cur > offset))
5273                 ring->ring[offset] = cur - offset;
5274         else
5275                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5276 }
5277
5278 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5279 {
5280         struct amdgpu_device *adev = ring->adev;
5281         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5282
5283         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5284         amdgpu_ring_write(ring, 0 |     /* src: register*/
5285                                 (5 << 8) |      /* dst: memory */
5286                                 (1 << 20));     /* write confirm */
5287         amdgpu_ring_write(ring, reg);
5288         amdgpu_ring_write(ring, 0);
5289         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5290                                 kiq->reg_val_offs * 4));
5291         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5292                                 kiq->reg_val_offs * 4));
5293 }
5294
5295 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5296                                     uint32_t val)
5297 {
5298         uint32_t cmd = 0;
5299
5300         switch (ring->funcs->type) {
5301         case AMDGPU_RING_TYPE_GFX:
5302                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5303                 break;
5304         case AMDGPU_RING_TYPE_KIQ:
5305                 cmd = (1 << 16); /* no inc addr */
5306                 break;
5307         default:
5308                 cmd = WR_CONFIRM;
5309                 break;
5310         }
5311         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5312         amdgpu_ring_write(ring, cmd);
5313         amdgpu_ring_write(ring, reg);
5314         amdgpu_ring_write(ring, 0);
5315         amdgpu_ring_write(ring, val);
5316 }
5317
5318 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5319                                         uint32_t val, uint32_t mask)
5320 {
5321         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5322 }
5323
5324 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5325                                                   uint32_t reg0, uint32_t reg1,
5326                                                   uint32_t ref, uint32_t mask)
5327 {
5328         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5329         struct amdgpu_device *adev = ring->adev;
5330         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5331                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5332
5333         if (fw_version_ok)
5334                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5335                                       ref, mask, 0x20);
5336         else
5337                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5338                                                            ref, mask);
5339 }
5340
5341 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5342 {
5343         struct amdgpu_device *adev = ring->adev;
5344         uint32_t value = 0;
5345
5346         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5347         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5348         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5349         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5350         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5351 }
5352
5353 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5354                                                  enum amdgpu_interrupt_state state)
5355 {
5356         switch (state) {
5357         case AMDGPU_IRQ_STATE_DISABLE:
5358         case AMDGPU_IRQ_STATE_ENABLE:
5359                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5360                                TIME_STAMP_INT_ENABLE,
5361                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5362                 break;
5363         default:
5364                 break;
5365         }
5366 }
5367
5368 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5369                                                      int me, int pipe,
5370                                                      enum amdgpu_interrupt_state state)
5371 {
5372         u32 mec_int_cntl, mec_int_cntl_reg;
5373
5374         /*
5375          * amdgpu controls only the first MEC. That's why this function only
5376          * handles the setting of interrupts for this specific MEC. All other
5377          * pipes' interrupts are set by amdkfd.
5378          */
5379
5380         if (me == 1) {
5381                 switch (pipe) {
5382                 case 0:
5383                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5384                         break;
5385                 case 1:
5386                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5387                         break;
5388                 case 2:
5389                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5390                         break;
5391                 case 3:
5392                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5393                         break;
5394                 default:
5395                         DRM_DEBUG("invalid pipe %d\n", pipe);
5396                         return;
5397                 }
5398         } else {
5399                 DRM_DEBUG("invalid me %d\n", me);
5400                 return;
5401         }
5402
5403         switch (state) {
5404         case AMDGPU_IRQ_STATE_DISABLE:
5405                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5406                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5407                                              TIME_STAMP_INT_ENABLE, 0);
5408                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5409                 break;
5410         case AMDGPU_IRQ_STATE_ENABLE:
5411                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5412                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5413                                              TIME_STAMP_INT_ENABLE, 1);
5414                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5415                 break;
5416         default:
5417                 break;
5418         }
5419 }
5420
5421 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5422                                              struct amdgpu_irq_src *source,
5423                                              unsigned type,
5424                                              enum amdgpu_interrupt_state state)
5425 {
5426         switch (state) {
5427         case AMDGPU_IRQ_STATE_DISABLE:
5428         case AMDGPU_IRQ_STATE_ENABLE:
5429                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5430                                PRIV_REG_INT_ENABLE,
5431                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5432                 break;
5433         default:
5434                 break;
5435         }
5436
5437         return 0;
5438 }
5439
5440 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5441                                               struct amdgpu_irq_src *source,
5442                                               unsigned type,
5443                                               enum amdgpu_interrupt_state state)
5444 {
5445         switch (state) {
5446         case AMDGPU_IRQ_STATE_DISABLE:
5447         case AMDGPU_IRQ_STATE_ENABLE:
5448                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5449                                PRIV_INSTR_INT_ENABLE,
5450                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5451         default:
5452                 break;
5453         }
5454
5455         return 0;
5456 }
5457
5458 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5459         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5460                         CP_ECC_ERROR_INT_ENABLE, 1)
5461
5462 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5463         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5464                         CP_ECC_ERROR_INT_ENABLE, 0)
5465
5466 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5467                                               struct amdgpu_irq_src *source,
5468                                               unsigned type,
5469                                               enum amdgpu_interrupt_state state)
5470 {
5471         switch (state) {
5472         case AMDGPU_IRQ_STATE_DISABLE:
5473                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5474                                 CP_ECC_ERROR_INT_ENABLE, 0);
5475                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5476                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5477                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5478                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5479                 break;
5480
5481         case AMDGPU_IRQ_STATE_ENABLE:
5482                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5483                                 CP_ECC_ERROR_INT_ENABLE, 1);
5484                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5485                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5486                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5487                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5488                 break;
5489         default:
5490                 break;
5491         }
5492
5493         return 0;
5494 }
5495
5496
5497 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5498                                             struct amdgpu_irq_src *src,
5499                                             unsigned type,
5500                                             enum amdgpu_interrupt_state state)
5501 {
5502         switch (type) {
5503         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5504                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5505                 break;
5506         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5507                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5508                 break;
5509         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5510                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5511                 break;
5512         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5513                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5514                 break;
5515         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5516                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5517                 break;
5518         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5519                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5520                 break;
5521         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5522                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5523                 break;
5524         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5525                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5526                 break;
5527         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5528                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5529                 break;
5530         default:
5531                 break;
5532         }
5533         return 0;
5534 }
5535
5536 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5537                             struct amdgpu_irq_src *source,
5538                             struct amdgpu_iv_entry *entry)
5539 {
5540         int i;
5541         u8 me_id, pipe_id, queue_id;
5542         struct amdgpu_ring *ring;
5543
5544         DRM_DEBUG("IH: CP EOP\n");
5545         me_id = (entry->ring_id & 0x0c) >> 2;
5546         pipe_id = (entry->ring_id & 0x03) >> 0;
5547         queue_id = (entry->ring_id & 0x70) >> 4;
5548
5549         switch (me_id) {
5550         case 0:
5551                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5552                 break;
5553         case 1:
5554         case 2:
5555                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5556                         ring = &adev->gfx.compute_ring[i];
5557                         /* Per-queue interrupt is supported for MEC starting from VI.
5558                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5559                           */
5560                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5561                                 amdgpu_fence_process(ring);
5562                 }
5563                 break;
5564         }
5565         return 0;
5566 }
5567
5568 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5569                            struct amdgpu_iv_entry *entry)
5570 {
5571         u8 me_id, pipe_id, queue_id;
5572         struct amdgpu_ring *ring;
5573         int i;
5574
5575         me_id = (entry->ring_id & 0x0c) >> 2;
5576         pipe_id = (entry->ring_id & 0x03) >> 0;
5577         queue_id = (entry->ring_id & 0x70) >> 4;
5578
5579         switch (me_id) {
5580         case 0:
5581                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5582                 break;
5583         case 1:
5584         case 2:
5585                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5586                         ring = &adev->gfx.compute_ring[i];
5587                         if (ring->me == me_id && ring->pipe == pipe_id &&
5588                             ring->queue == queue_id)
5589                                 drm_sched_fault(&ring->sched);
5590                 }
5591                 break;
5592         }
5593 }
5594
5595 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5596                                  struct amdgpu_irq_src *source,
5597                                  struct amdgpu_iv_entry *entry)
5598 {
5599         DRM_ERROR("Illegal register access in command stream\n");
5600         gfx_v9_0_fault(adev, entry);
5601         return 0;
5602 }
5603
5604 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5605                                   struct amdgpu_irq_src *source,
5606                                   struct amdgpu_iv_entry *entry)
5607 {
5608         DRM_ERROR("Illegal instruction in command stream\n");
5609         gfx_v9_0_fault(adev, entry);
5610         return 0;
5611 }
5612
5613
5614 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5615         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5616           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5617           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5618         },
5619         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5620           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5621           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5622         },
5623         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5624           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5625           0, 0
5626         },
5627         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5628           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5629           0, 0
5630         },
5631         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5632           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5633           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5634         },
5635         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5636           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5637           0, 0
5638         },
5639         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5640           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5641           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5642         },
5643         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5644           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5645           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5646         },
5647         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5648           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5649           0, 0
5650         },
5651         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5652           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5653           0, 0
5654         },
5655         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5656           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5657           0, 0
5658         },
5659         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5660           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5661           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5662         },
5663         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5664           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5665           0, 0
5666         },
5667         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5668           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5669           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5670         },
5671         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5672           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5673           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5674           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5675         },
5676         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5677           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5678           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5679           0, 0
5680         },
5681         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5682           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5683           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5684           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5685         },
5686         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5687           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5688           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5689           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5690         },
5691         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5692           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5693           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5694           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5695         },
5696         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5697           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5698           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5699           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5700         },
5701         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5702           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5703           0, 0
5704         },
5705         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5706           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5707           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5708         },
5709         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5710           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5711           0, 0
5712         },
5713         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5714           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5715           0, 0
5716         },
5717         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5718           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5719           0, 0
5720         },
5721         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5722           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5723           0, 0
5724         },
5725         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5726           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5727           0, 0
5728         },
5729         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5730           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5731           0, 0
5732         },
5733         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5734           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5735           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5736         },
5737         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5738           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5739           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5740         },
5741         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5742           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5743           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5744         },
5745         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5746           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5747           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5748         },
5749         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5750           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5751           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5752         },
5753         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5754           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5755           0, 0
5756         },
5757         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5758           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5759           0, 0
5760         },
5761         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5762           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5763           0, 0
5764         },
5765         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5766           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5767           0, 0
5768         },
5769         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5770           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5771           0, 0
5772         },
5773         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5774           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5775           0, 0
5776         },
5777         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5778           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5779           0, 0
5780         },
5781         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5782           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5783           0, 0
5784         },
5785         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5786           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5787           0, 0
5788         },
5789         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5790           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5791           0, 0
5792         },
5793         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5794           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5795           0, 0
5796         },
5797         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5798           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5799           0, 0
5800         },
5801         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5802           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5803           0, 0
5804         },
5805         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5806           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5807           0, 0
5808         },
5809         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5810           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5811           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5812         },
5813         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5814           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5815           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5816         },
5817         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5818           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5819           0, 0
5820         },
5821         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5822           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5823           0, 0
5824         },
5825         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5826           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5827           0, 0
5828         },
5829         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5830           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5831           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5832         },
5833         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5834           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5835           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5836         },
5837         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5838           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5839           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5840         },
5841         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5842           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5843           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5844         },
5845         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5846           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5847           0, 0
5848         },
5849         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5850           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5851           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5852         },
5853         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5854           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5855           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5856         },
5857         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5858           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5859           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5860         },
5861         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5862           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5863           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5864         },
5865         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5866           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5867           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5868         },
5869         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5870           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5871           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5872         },
5873         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5874           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5875           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5876         },
5877         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5878           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5879           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5880         },
5881         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5882           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5883           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5884         },
5885         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5886           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5887           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5888         },
5889         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5890           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5891           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5892         },
5893         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5894           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5895           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5896         },
5897         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5898           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5899           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5900         },
5901         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5902           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5903           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5904         },
5905         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5906           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5907           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5908         },
5909         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5910           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5911           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5912         },
5913         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5914           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5915           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5916         },
5917         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5918           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5919           0, 0
5920         },
5921         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5922           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5923           0, 0
5924         },
5925         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5926           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5927           0, 0
5928         },
5929         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5930           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5931           0, 0
5932         },
5933         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5934           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5935           0, 0
5936         },
5937         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5938           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5939           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5940         },
5941         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5942           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5943           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5944         },
5945         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5946           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5947           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5948         },
5949         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5950           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5951           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5952         },
5953         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5954           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5955           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5956         },
5957         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5958           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5959           0, 0
5960         },
5961         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5962           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5963           0, 0
5964         },
5965         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5966           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5967           0, 0
5968         },
5969         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5970           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5971           0, 0
5972         },
5973         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5974           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5975           0, 0
5976         },
5977         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5978           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5979           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5980         },
5981         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5982           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5983           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5984         },
5985         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5986           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5987           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5988         },
5989         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5990           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5991           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5992         },
5993         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5994           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5995           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5996         },
5997         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5998           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5999           0, 0
6000         },
6001         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6002           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6003           0, 0
6004         },
6005         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6006           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6007           0, 0
6008         },
6009         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6010           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6011           0, 0
6012         },
6013         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6014           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6015           0, 0
6016         },
6017         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6018           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6019           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6020         },
6021         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6022           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6023           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6024         },
6025         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6026           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6027           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6028         },
6029         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6030           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6031           0, 0
6032         },
6033         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6034           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6035           0, 0
6036         },
6037         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6038           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6039           0, 0
6040         },
6041         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6042           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6043           0, 0
6044         },
6045         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6046           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6047           0, 0
6048         },
6049         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6050           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6051           0, 0
6052         }
6053 };
6054
6055 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6056                                      void *inject_if)
6057 {
6058         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6059         int ret;
6060         struct ta_ras_trigger_error_input block_info = { 0 };
6061
6062         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6063                 return -EINVAL;
6064
6065         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6066                 return -EINVAL;
6067
6068         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6069                 return -EPERM;
6070
6071         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6072               info->head.type)) {
6073                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6074                         ras_gfx_subblocks[info->head.sub_block_index].name,
6075                         info->head.type);
6076                 return -EPERM;
6077         }
6078
6079         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6080               info->head.type)) {
6081                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6082                         ras_gfx_subblocks[info->head.sub_block_index].name,
6083                         info->head.type);
6084                 return -EPERM;
6085         }
6086
6087         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6088         block_info.sub_block_index =
6089                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6090         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6091         block_info.address = info->address;
6092         block_info.value = info->value;
6093
6094         mutex_lock(&adev->grbm_idx_mutex);
6095         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6096         mutex_unlock(&adev->grbm_idx_mutex);
6097
6098         return ret;
6099 }
6100
6101 static const char *vml2_mems[] = {
6102         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6103         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6104         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6105         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6106         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6107         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6108         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6109         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6110         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6111         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6112         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6113         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6114         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6115         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6116         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6117         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6118 };
6119
6120 static const char *vml2_walker_mems[] = {
6121         "UTC_VML2_CACHE_PDE0_MEM0",
6122         "UTC_VML2_CACHE_PDE0_MEM1",
6123         "UTC_VML2_CACHE_PDE1_MEM0",
6124         "UTC_VML2_CACHE_PDE1_MEM1",
6125         "UTC_VML2_CACHE_PDE2_MEM0",
6126         "UTC_VML2_CACHE_PDE2_MEM1",
6127         "UTC_VML2_RDIF_LOG_FIFO",
6128 };
6129
6130 static const char *atc_l2_cache_2m_mems[] = {
6131         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6132         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6133         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6134         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6135 };
6136
6137 static const char *atc_l2_cache_4k_mems[] = {
6138         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6139         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6140         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6141         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6142         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6143         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6144         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6145         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6146         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6147         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6148         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6149         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6150         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6151         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6152         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6153         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6154         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6155         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6156         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6157         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6158         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6159         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6160         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6161         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6162         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6163         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6164         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6165         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6166         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6167         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6168         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6169         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6170 };
6171
6172 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6173                                          struct ras_err_data *err_data)
6174 {
6175         uint32_t i, data;
6176         uint32_t sec_count, ded_count;
6177
6178         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6179         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6180         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6181         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6182         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6183         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6184         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6185         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6186
6187         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6188                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6189                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6190
6191                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6192                 if (sec_count) {
6193                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6194                                  vml2_mems[i], sec_count);
6195                         err_data->ce_count += sec_count;
6196                 }
6197
6198                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6199                 if (ded_count) {
6200                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6201                                  vml2_mems[i], ded_count);
6202                         err_data->ue_count += ded_count;
6203                 }
6204         }
6205
6206         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6207                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6208                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6209
6210                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6211                                                 SEC_COUNT);
6212                 if (sec_count) {
6213                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6214                                  vml2_walker_mems[i], sec_count);
6215                         err_data->ce_count += sec_count;
6216                 }
6217
6218                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6219                                                 DED_COUNT);
6220                 if (ded_count) {
6221                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6222                                  vml2_walker_mems[i], ded_count);
6223                         err_data->ue_count += ded_count;
6224                 }
6225         }
6226
6227         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6228                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6229                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6230
6231                 sec_count = (data & 0x00006000L) >> 0xd;
6232                 if (sec_count) {
6233                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6234                                  atc_l2_cache_2m_mems[i], sec_count);
6235                         err_data->ce_count += sec_count;
6236                 }
6237         }
6238
6239         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6240                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6241                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6242
6243                 sec_count = (data & 0x00006000L) >> 0xd;
6244                 if (sec_count) {
6245                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6246                                  atc_l2_cache_4k_mems[i], sec_count);
6247                         err_data->ce_count += sec_count;
6248                 }
6249
6250                 ded_count = (data & 0x00018000L) >> 0xf;
6251                 if (ded_count) {
6252                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6253                                  atc_l2_cache_4k_mems[i], ded_count);
6254                         err_data->ue_count += ded_count;
6255                 }
6256         }
6257
6258         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6259         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6260         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6261         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6262
6263         return 0;
6264 }
6265
6266 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6267         uint32_t se_id, uint32_t inst_id, uint32_t value,
6268         uint32_t *sec_count, uint32_t *ded_count)
6269 {
6270         uint32_t i;
6271         uint32_t sec_cnt, ded_cnt;
6272
6273         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6274                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6275                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6276                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6277                         continue;
6278
6279                 sec_cnt = (value &
6280                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6281                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6282                 if (sec_cnt) {
6283                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6284                                 gfx_v9_0_ras_fields[i].name,
6285                                 se_id, inst_id,
6286                                 sec_cnt);
6287                         *sec_count += sec_cnt;
6288                 }
6289
6290                 ded_cnt = (value &
6291                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6292                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6293                 if (ded_cnt) {
6294                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6295                                 gfx_v9_0_ras_fields[i].name,
6296                                 se_id, inst_id,
6297                                 ded_cnt);
6298                         *ded_count += ded_cnt;
6299                 }
6300         }
6301
6302         return 0;
6303 }
6304
6305 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6306 {
6307         int i, j, k;
6308
6309         /* read back registers to clear the counters */
6310         mutex_lock(&adev->grbm_idx_mutex);
6311         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6312                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6313                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6314                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6315                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6316                         }
6317                 }
6318         }
6319         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6320         mutex_unlock(&adev->grbm_idx_mutex);
6321
6322         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6323         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6324         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6325         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6326         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6327         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6328         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6329         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6330
6331         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6332                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6333                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6334         }
6335
6336         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6337                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6338                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6339         }
6340
6341         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6342                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6343                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6344         }
6345
6346         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6347                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6348                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6349         }
6350
6351         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6352         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6353         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6354         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6355 }
6356
6357 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6358                                           void *ras_error_status)
6359 {
6360         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6361         uint32_t sec_count = 0, ded_count = 0;
6362         uint32_t i, j, k;
6363         uint32_t reg_value;
6364
6365         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6366                 return -EINVAL;
6367
6368         err_data->ue_count = 0;
6369         err_data->ce_count = 0;
6370
6371         mutex_lock(&adev->grbm_idx_mutex);
6372
6373         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6374                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6375                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6376                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6377                                 reg_value =
6378                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6379                                 if (reg_value)
6380                                         gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6381                                                         j, k, reg_value,
6382                                                         &sec_count, &ded_count);
6383                         }
6384                 }
6385         }
6386
6387         err_data->ce_count += sec_count;
6388         err_data->ue_count += ded_count;
6389
6390         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6391         mutex_unlock(&adev->grbm_idx_mutex);
6392
6393         gfx_v9_0_query_utc_edc_status(adev, err_data);
6394
6395         return 0;
6396 }
6397
6398 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6399         .name = "gfx_v9_0",
6400         .early_init = gfx_v9_0_early_init,
6401         .late_init = gfx_v9_0_late_init,
6402         .sw_init = gfx_v9_0_sw_init,
6403         .sw_fini = gfx_v9_0_sw_fini,
6404         .hw_init = gfx_v9_0_hw_init,
6405         .hw_fini = gfx_v9_0_hw_fini,
6406         .suspend = gfx_v9_0_suspend,
6407         .resume = gfx_v9_0_resume,
6408         .is_idle = gfx_v9_0_is_idle,
6409         .wait_for_idle = gfx_v9_0_wait_for_idle,
6410         .soft_reset = gfx_v9_0_soft_reset,
6411         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6412         .set_powergating_state = gfx_v9_0_set_powergating_state,
6413         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6414 };
6415
6416 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6417         .type = AMDGPU_RING_TYPE_GFX,
6418         .align_mask = 0xff,
6419         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6420         .support_64bit_ptrs = true,
6421         .vmhub = AMDGPU_GFXHUB_0,
6422         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6423         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6424         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6425         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6426                 5 +  /* COND_EXEC */
6427                 7 +  /* PIPELINE_SYNC */
6428                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6429                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6430                 2 + /* VM_FLUSH */
6431                 8 +  /* FENCE for VM_FLUSH */
6432                 20 + /* GDS switch */
6433                 4 + /* double SWITCH_BUFFER,
6434                        the first COND_EXEC jump to the place just
6435                            prior to this double SWITCH_BUFFER  */
6436                 5 + /* COND_EXEC */
6437                 7 +      /*     HDP_flush */
6438                 4 +      /*     VGT_flush */
6439                 14 + /* CE_META */
6440                 31 + /* DE_META */
6441                 3 + /* CNTX_CTRL */
6442                 5 + /* HDP_INVL */
6443                 8 + 8 + /* FENCE x2 */
6444                 2, /* SWITCH_BUFFER */
6445         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6446         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6447         .emit_fence = gfx_v9_0_ring_emit_fence,
6448         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6449         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6450         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6451         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6452         .test_ring = gfx_v9_0_ring_test_ring,
6453         .test_ib = gfx_v9_0_ring_test_ib,
6454         .insert_nop = amdgpu_ring_insert_nop,
6455         .pad_ib = amdgpu_ring_generic_pad_ib,
6456         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6457         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6458         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6459         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6460         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6461         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6462         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6463         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6464         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6465 };
6466
6467 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6468         .type = AMDGPU_RING_TYPE_COMPUTE,
6469         .align_mask = 0xff,
6470         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6471         .support_64bit_ptrs = true,
6472         .vmhub = AMDGPU_GFXHUB_0,
6473         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6474         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6475         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6476         .emit_frame_size =
6477                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6478                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6479                 5 + /* hdp invalidate */
6480                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6481                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6482                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6483                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6484                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6485         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6486         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6487         .emit_fence = gfx_v9_0_ring_emit_fence,
6488         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6489         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6490         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6491         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6492         .test_ring = gfx_v9_0_ring_test_ring,
6493         .test_ib = gfx_v9_0_ring_test_ib,
6494         .insert_nop = amdgpu_ring_insert_nop,
6495         .pad_ib = amdgpu_ring_generic_pad_ib,
6496         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6497         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6498         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6499 };
6500
6501 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6502         .type = AMDGPU_RING_TYPE_KIQ,
6503         .align_mask = 0xff,
6504         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6505         .support_64bit_ptrs = true,
6506         .vmhub = AMDGPU_GFXHUB_0,
6507         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6508         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6509         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6510         .emit_frame_size =
6511                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6512                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6513                 5 + /* hdp invalidate */
6514                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6515                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6516                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6517                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6518                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6519         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6520         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6521         .test_ring = gfx_v9_0_ring_test_ring,
6522         .insert_nop = amdgpu_ring_insert_nop,
6523         .pad_ib = amdgpu_ring_generic_pad_ib,
6524         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6525         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6526         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6527         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6528 };
6529
6530 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6531 {
6532         int i;
6533
6534         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6535
6536         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6537                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6538
6539         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6540                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6541 }
6542
6543 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6544         .set = gfx_v9_0_set_eop_interrupt_state,
6545         .process = gfx_v9_0_eop_irq,
6546 };
6547
6548 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6549         .set = gfx_v9_0_set_priv_reg_fault_state,
6550         .process = gfx_v9_0_priv_reg_irq,
6551 };
6552
6553 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6554         .set = gfx_v9_0_set_priv_inst_fault_state,
6555         .process = gfx_v9_0_priv_inst_irq,
6556 };
6557
6558 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6559         .set = gfx_v9_0_set_cp_ecc_error_state,
6560         .process = amdgpu_gfx_cp_ecc_error_irq,
6561 };
6562
6563
6564 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6565 {
6566         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6567         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6568
6569         adev->gfx.priv_reg_irq.num_types = 1;
6570         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6571
6572         adev->gfx.priv_inst_irq.num_types = 1;
6573         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6574
6575         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6576         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6577 }
6578
6579 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6580 {
6581         switch (adev->asic_type) {
6582         case CHIP_VEGA10:
6583         case CHIP_VEGA12:
6584         case CHIP_VEGA20:
6585         case CHIP_RAVEN:
6586         case CHIP_ARCTURUS:
6587         case CHIP_RENOIR:
6588                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6589                 break;
6590         default:
6591                 break;
6592         }
6593 }
6594
6595 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6596 {
6597         /* init asci gds info */
6598         switch (adev->asic_type) {
6599         case CHIP_VEGA10:
6600         case CHIP_VEGA12:
6601         case CHIP_VEGA20:
6602                 adev->gds.gds_size = 0x10000;
6603                 break;
6604         case CHIP_RAVEN:
6605         case CHIP_ARCTURUS:
6606                 adev->gds.gds_size = 0x1000;
6607                 break;
6608         default:
6609                 adev->gds.gds_size = 0x10000;
6610                 break;
6611         }
6612
6613         switch (adev->asic_type) {
6614         case CHIP_VEGA10:
6615         case CHIP_VEGA20:
6616                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6617                 break;
6618         case CHIP_VEGA12:
6619                 adev->gds.gds_compute_max_wave_id = 0x27f;
6620                 break;
6621         case CHIP_RAVEN:
6622                 if (adev->rev_id >= 0x8)
6623                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6624                 else
6625                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6626                 break;
6627         case CHIP_ARCTURUS:
6628                 adev->gds.gds_compute_max_wave_id = 0xfff;
6629                 break;
6630         default:
6631                 /* this really depends on the chip */
6632                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6633                 break;
6634         }
6635
6636         adev->gds.gws_size = 64;
6637         adev->gds.oa_size = 16;
6638 }
6639
6640 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6641                                                  u32 bitmap)
6642 {
6643         u32 data;
6644
6645         if (!bitmap)
6646                 return;
6647
6648         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6649         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6650
6651         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6652 }
6653
6654 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6655 {
6656         u32 data, mask;
6657
6658         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6659         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6660
6661         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6662         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6663
6664         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6665
6666         return (~data) & mask;
6667 }
6668
6669 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6670                                  struct amdgpu_cu_info *cu_info)
6671 {
6672         int i, j, k, counter, active_cu_number = 0;
6673         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6674         unsigned disable_masks[4 * 4];
6675
6676         if (!adev || !cu_info)
6677                 return -EINVAL;
6678
6679         /*
6680          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6681          */
6682         if (adev->gfx.config.max_shader_engines *
6683                 adev->gfx.config.max_sh_per_se > 16)
6684                 return -EINVAL;
6685
6686         amdgpu_gfx_parse_disable_cu(disable_masks,
6687                                     adev->gfx.config.max_shader_engines,
6688                                     adev->gfx.config.max_sh_per_se);
6689
6690         mutex_lock(&adev->grbm_idx_mutex);
6691         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6692                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6693                         mask = 1;
6694                         ao_bitmap = 0;
6695                         counter = 0;
6696                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6697                         gfx_v9_0_set_user_cu_inactive_bitmap(
6698                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6699                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6700
6701                         /*
6702                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6703                          * 4x4 size array, and it's usually suitable for Vega
6704                          * ASICs which has 4*2 SE/SH layout.
6705                          * But for Arcturus, SE/SH layout is changed to 8*1.
6706                          * To mostly reduce the impact, we make it compatible
6707                          * with current bitmap array as below:
6708                          *    SE4,SH0 --> bitmap[0][1]
6709                          *    SE5,SH0 --> bitmap[1][1]
6710                          *    SE6,SH0 --> bitmap[2][1]
6711                          *    SE7,SH0 --> bitmap[3][1]
6712                          */
6713                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6714
6715                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6716                                 if (bitmap & mask) {
6717                                         if (counter < adev->gfx.config.max_cu_per_sh)
6718                                                 ao_bitmap |= mask;
6719                                         counter ++;
6720                                 }
6721                                 mask <<= 1;
6722                         }
6723                         active_cu_number += counter;
6724                         if (i < 2 && j < 2)
6725                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6726                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6727                 }
6728         }
6729         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6730         mutex_unlock(&adev->grbm_idx_mutex);
6731
6732         cu_info->number = active_cu_number;
6733         cu_info->ao_cu_mask = ao_cu_mask;
6734         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6735
6736         return 0;
6737 }
6738
6739 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6740 {
6741         .type = AMD_IP_BLOCK_TYPE_GFX,
6742         .major = 9,
6743         .minor = 0,
6744         .rev = 0,
6745         .funcs = &gfx_v9_0_ip_funcs,
6746 };