drm/amdgpu: add xgmi perfmons for arcturus
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56
57 #define GFX9_NUM_GFX_RINGS     1
58 #define GFX9_MEC_HPD_SIZE 4096
59 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
60 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
61
62 #define mmGCEA_PROBE_MAP                        0x070c
63 #define mmGCEA_PROBE_MAP_BASE_IDX               0
64
65 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
71
72 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
78
79 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
85
86 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
87 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
88 MODULE_FIRMWARE("amdgpu/raven_me.bin");
89 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
91 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
92
93 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
100
101 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
108
109 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
110 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
119
120 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
121 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
122 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
123 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
124 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
125 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
126 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
127 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
128 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
129 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
130 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
131 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
132
133 enum ta_ras_gfx_subblock {
134         /*CPC*/
135         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
136         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
137         TA_RAS_BLOCK__GFX_CPC_UCODE,
138         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
139         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
140         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
141         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
142         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
143         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
144         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145         /* CPF*/
146         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
147         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
149         TA_RAS_BLOCK__GFX_CPF_TAG,
150         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
151         /* CPG*/
152         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
153         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
155         TA_RAS_BLOCK__GFX_CPG_TAG,
156         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
157         /* GDS*/
158         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
159         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
161         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
162         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
163         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
164         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165         /* SPI*/
166         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
167         /* SQ*/
168         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
169         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170         TA_RAS_BLOCK__GFX_SQ_LDS_D,
171         TA_RAS_BLOCK__GFX_SQ_LDS_I,
172         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
173         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
174         /* SQC (3 ranges)*/
175         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
176         /* SQC range 0*/
177         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
178         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
179                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
180         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
181         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
182         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
186         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
187                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
188         /* SQC range 1*/
189         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
190         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
191                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
192         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
194         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
195         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
200         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
201                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
202         /* SQC range 2*/
203         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
204         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
205                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
206         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
208         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
209         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
214         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
215                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
216         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
217         /* TA*/
218         TA_RAS_BLOCK__GFX_TA_INDEX_START,
219         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
220         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
221         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
222         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
223         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
224         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225         /* TCA*/
226         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
227         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
229         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230         /* TCC (5 sub-ranges)*/
231         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
232         /* TCC range 0*/
233         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
234         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
236         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
240         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
241         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
242         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243         /* TCC range 1*/
244         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
245         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
247         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
248                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
249         /* TCC range 2*/
250         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
251         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
253         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
254         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
255         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
256         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
257         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
258         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
259         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
260                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
261         /* TCC range 3*/
262         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
263         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
265         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
266                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
267         /* TCC range 4*/
268         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
269         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
270                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
271         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
272         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
273                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
274         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
275         /* TCI*/
276         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
277         /* TCP*/
278         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
279         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
281         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
282         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
283         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
284         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
285         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
286         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287         /* TD*/
288         TA_RAS_BLOCK__GFX_TD_INDEX_START,
289         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
290         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
291         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
292         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293         /* EA (3 sub-ranges)*/
294         TA_RAS_BLOCK__GFX_EA_INDEX_START,
295         /* EA range 0*/
296         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
297         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
298         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
299         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
300         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
301         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
302         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
303         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
304         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
305         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306         /* EA range 1*/
307         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
308         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
310         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
311         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
312         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
313         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
314         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
315         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316         /* EA range 2*/
317         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
318         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
320         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
321         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
322         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
324         /* UTC VM L2 bank*/
325         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
326         /* UTC VM walker*/
327         TA_RAS_BLOCK__UTC_VML2_WALKER,
328         /* UTC ATC L2 2MB cache*/
329         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
330         /* UTC ATC L2 4KB cache*/
331         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
332         TA_RAS_BLOCK__GFX_MAX
333 };
334
335 struct ras_gfx_subblock {
336         unsigned char *name;
337         int ta_subblock;
338         int hw_supported_error_type;
339         int sw_supported_error_type;
340 };
341
342 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
343         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
344                 #subblock,                                                     \
345                 TA_RAS_BLOCK__##subblock,                                      \
346                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
347                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
348         }
349
350 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
351         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
352         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
353         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
368                              0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
370                              0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
379                              0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
381                              0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
383                              0, 0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
385                              0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
387                              0, 0),
388         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
389                              0),
390         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
391                              1),
392         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
393                              0, 0, 0),
394         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
395                              0),
396         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
397                              0),
398         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
399                              0),
400         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
401                              0),
402         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
403                              0),
404         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
405                              0, 0),
406         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
407                              0),
408         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
409                              0),
410         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
411                              0, 0, 0),
412         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
413                              0),
414         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
415                              0),
416         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
417                              0),
418         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
419                              0),
420         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
421                              0),
422         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
423                              0, 0),
424         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
425                              0),
426         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
435                              1),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
437                              1),
438         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
439                              1),
440         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
441                              0),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
443                              0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
456                              0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
459                              0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
461                              0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
463                              0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
498 };
499
500 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
501 {
502         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
522 };
523
524 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
525 {
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
544 };
545
546 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
547 {
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
559 };
560
561 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
562 {
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
587 };
588
589 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
590 {
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
598 };
599
600 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
601 {
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
621 };
622
623 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
624 {
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
637 };
638
639 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
640 {
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
644 };
645
646 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
647 {
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
664 };
665
666 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
667 {
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
681 };
682
683 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
684 {
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
696 };
697
698 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
699         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
700         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
701 };
702
703 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
704 {
705         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
711         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
712         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713 };
714
715 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
716 {
717         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
723         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
724         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725 };
726
727 static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
728 {
729         static void *scratch_reg0;
730         static void *scratch_reg1;
731         static void *scratch_reg2;
732         static void *scratch_reg3;
733         static void *spare_int;
734         static uint32_t grbm_cntl;
735         static uint32_t grbm_idx;
736
737         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
738         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
739         scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
740         scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
741         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
742
743         grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
744         grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
745
746         if (amdgpu_sriov_runtime(adev)) {
747                 pr_err("shouldn't call rlcg write register during runtime\n");
748                 return;
749         }
750
751         if (offset == grbm_cntl || offset == grbm_idx) {
752                 if (offset  == grbm_cntl)
753                         writel(v, scratch_reg2);
754                 else if (offset == grbm_idx)
755                         writel(v, scratch_reg3);
756
757                 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
758         } else {
759                 uint32_t i = 0;
760                 uint32_t retries = 50000;
761
762                 writel(v, scratch_reg0);
763                 writel(offset | 0x80000000, scratch_reg1);
764                 writel(1, spare_int);
765                 for (i = 0; i < retries; i++) {
766                         u32 tmp;
767
768                         tmp = readl(scratch_reg1);
769                         if (!(tmp & 0x80000000))
770                                 break;
771
772                         udelay(10);
773                 }
774                 if (i >= retries)
775                         pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
776         }
777
778 }
779
780 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
781 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
782 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
783 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
784
785 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
786 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
787 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
788 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
789 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
790                                  struct amdgpu_cu_info *cu_info);
791 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
792 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
793 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
794 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
795                                           void *ras_error_status);
796 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
797                                      void *inject_if);
798 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
799
800 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
801                                 uint64_t queue_mask)
802 {
803         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
804         amdgpu_ring_write(kiq_ring,
805                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
806                 /* vmid_mask:0* queue_type:0 (KIQ) */
807                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
808         amdgpu_ring_write(kiq_ring,
809                         lower_32_bits(queue_mask));     /* queue mask lo */
810         amdgpu_ring_write(kiq_ring,
811                         upper_32_bits(queue_mask));     /* queue mask hi */
812         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
813         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
814         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
815         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
816 }
817
818 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
819                                  struct amdgpu_ring *ring)
820 {
821         struct amdgpu_device *adev = kiq_ring->adev;
822         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
823         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
824         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
825
826         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
827         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
828         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
829                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
830                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
831                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
832                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
833                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
834                          /*queue_type: normal compute queue */
835                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
836                          /* alloc format: all_on_one_pipe */
837                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
838                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
839                          /* num_queues: must be 1 */
840                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
841         amdgpu_ring_write(kiq_ring,
842                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
843         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
844         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
845         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
846         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
847 }
848
849 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
850                                    struct amdgpu_ring *ring,
851                                    enum amdgpu_unmap_queues_action action,
852                                    u64 gpu_addr, u64 seq)
853 {
854         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
855
856         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
857         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
858                           PACKET3_UNMAP_QUEUES_ACTION(action) |
859                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
860                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
861                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
862         amdgpu_ring_write(kiq_ring,
863                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
864
865         if (action == PREEMPT_QUEUES_NO_UNMAP) {
866                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
867                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
868                 amdgpu_ring_write(kiq_ring, seq);
869         } else {
870                 amdgpu_ring_write(kiq_ring, 0);
871                 amdgpu_ring_write(kiq_ring, 0);
872                 amdgpu_ring_write(kiq_ring, 0);
873         }
874 }
875
876 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
877                                    struct amdgpu_ring *ring,
878                                    u64 addr,
879                                    u64 seq)
880 {
881         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
882
883         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
884         amdgpu_ring_write(kiq_ring,
885                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
886                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
887                           PACKET3_QUERY_STATUS_COMMAND(2));
888         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
889         amdgpu_ring_write(kiq_ring,
890                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
891                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
892         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
893         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
894         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
895         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
896 }
897
898 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
899                                 uint16_t pasid, uint32_t flush_type,
900                                 bool all_hub)
901 {
902         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
903         amdgpu_ring_write(kiq_ring,
904                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
905                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
906                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
907                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
908 }
909
910 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
911         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
912         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
913         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
914         .kiq_query_status = gfx_v9_0_kiq_query_status,
915         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
916         .set_resources_size = 8,
917         .map_queues_size = 7,
918         .unmap_queues_size = 6,
919         .query_status_size = 7,
920         .invalidate_tlbs_size = 2,
921 };
922
923 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
924 {
925         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
926 }
927
928 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
929 {
930         switch (adev->asic_type) {
931         case CHIP_VEGA10:
932                 soc15_program_register_sequence(adev,
933                                                 golden_settings_gc_9_0,
934                                                 ARRAY_SIZE(golden_settings_gc_9_0));
935                 soc15_program_register_sequence(adev,
936                                                 golden_settings_gc_9_0_vg10,
937                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
938                 break;
939         case CHIP_VEGA12:
940                 soc15_program_register_sequence(adev,
941                                                 golden_settings_gc_9_2_1,
942                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
943                 soc15_program_register_sequence(adev,
944                                                 golden_settings_gc_9_2_1_vg12,
945                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
946                 break;
947         case CHIP_VEGA20:
948                 soc15_program_register_sequence(adev,
949                                                 golden_settings_gc_9_0,
950                                                 ARRAY_SIZE(golden_settings_gc_9_0));
951                 soc15_program_register_sequence(adev,
952                                                 golden_settings_gc_9_0_vg20,
953                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
954                 break;
955         case CHIP_ARCTURUS:
956                 soc15_program_register_sequence(adev,
957                                                 golden_settings_gc_9_4_1_arct,
958                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
959                 break;
960         case CHIP_RAVEN:
961                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
962                                                 ARRAY_SIZE(golden_settings_gc_9_1));
963                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
964                         soc15_program_register_sequence(adev,
965                                                         golden_settings_gc_9_1_rv2,
966                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
967                 else
968                         soc15_program_register_sequence(adev,
969                                                         golden_settings_gc_9_1_rv1,
970                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
971                 break;
972          case CHIP_RENOIR:
973                 soc15_program_register_sequence(adev,
974                                                 golden_settings_gc_9_1_rn,
975                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
976                 return; /* for renoir, don't need common goldensetting */
977         default:
978                 break;
979         }
980
981         if (adev->asic_type != CHIP_ARCTURUS)
982                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
983                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
984 }
985
986 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
987 {
988         adev->gfx.scratch.num_reg = 8;
989         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
990         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
991 }
992
993 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
994                                        bool wc, uint32_t reg, uint32_t val)
995 {
996         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
997         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
998                                 WRITE_DATA_DST_SEL(0) |
999                                 (wc ? WR_CONFIRM : 0));
1000         amdgpu_ring_write(ring, reg);
1001         amdgpu_ring_write(ring, 0);
1002         amdgpu_ring_write(ring, val);
1003 }
1004
1005 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1006                                   int mem_space, int opt, uint32_t addr0,
1007                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1008                                   uint32_t inv)
1009 {
1010         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1011         amdgpu_ring_write(ring,
1012                                  /* memory (1) or register (0) */
1013                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1014                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1015                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1016                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1017
1018         if (mem_space)
1019                 BUG_ON(addr0 & 0x3); /* Dword align */
1020         amdgpu_ring_write(ring, addr0);
1021         amdgpu_ring_write(ring, addr1);
1022         amdgpu_ring_write(ring, ref);
1023         amdgpu_ring_write(ring, mask);
1024         amdgpu_ring_write(ring, inv); /* poll interval */
1025 }
1026
1027 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1028 {
1029         struct amdgpu_device *adev = ring->adev;
1030         uint32_t scratch;
1031         uint32_t tmp = 0;
1032         unsigned i;
1033         int r;
1034
1035         r = amdgpu_gfx_scratch_get(adev, &scratch);
1036         if (r)
1037                 return r;
1038
1039         WREG32(scratch, 0xCAFEDEAD);
1040         r = amdgpu_ring_alloc(ring, 3);
1041         if (r)
1042                 goto error_free_scratch;
1043
1044         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1045         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1046         amdgpu_ring_write(ring, 0xDEADBEEF);
1047         amdgpu_ring_commit(ring);
1048
1049         for (i = 0; i < adev->usec_timeout; i++) {
1050                 tmp = RREG32(scratch);
1051                 if (tmp == 0xDEADBEEF)
1052                         break;
1053                 udelay(1);
1054         }
1055
1056         if (i >= adev->usec_timeout)
1057                 r = -ETIMEDOUT;
1058
1059 error_free_scratch:
1060         amdgpu_gfx_scratch_free(adev, scratch);
1061         return r;
1062 }
1063
1064 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1065 {
1066         struct amdgpu_device *adev = ring->adev;
1067         struct amdgpu_ib ib;
1068         struct dma_fence *f = NULL;
1069
1070         unsigned index;
1071         uint64_t gpu_addr;
1072         uint32_t tmp;
1073         long r;
1074
1075         r = amdgpu_device_wb_get(adev, &index);
1076         if (r)
1077                 return r;
1078
1079         gpu_addr = adev->wb.gpu_addr + (index * 4);
1080         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1081         memset(&ib, 0, sizeof(ib));
1082         r = amdgpu_ib_get(adev, NULL, 16,
1083                                         AMDGPU_IB_POOL_DIRECT, &ib);
1084         if (r)
1085                 goto err1;
1086
1087         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1088         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1089         ib.ptr[2] = lower_32_bits(gpu_addr);
1090         ib.ptr[3] = upper_32_bits(gpu_addr);
1091         ib.ptr[4] = 0xDEADBEEF;
1092         ib.length_dw = 5;
1093
1094         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1095         if (r)
1096                 goto err2;
1097
1098         r = dma_fence_wait_timeout(f, false, timeout);
1099         if (r == 0) {
1100                 r = -ETIMEDOUT;
1101                 goto err2;
1102         } else if (r < 0) {
1103                 goto err2;
1104         }
1105
1106         tmp = adev->wb.wb[index];
1107         if (tmp == 0xDEADBEEF)
1108                 r = 0;
1109         else
1110                 r = -EINVAL;
1111
1112 err2:
1113         amdgpu_ib_free(adev, &ib, NULL);
1114         dma_fence_put(f);
1115 err1:
1116         amdgpu_device_wb_free(adev, index);
1117         return r;
1118 }
1119
1120
1121 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1122 {
1123         release_firmware(adev->gfx.pfp_fw);
1124         adev->gfx.pfp_fw = NULL;
1125         release_firmware(adev->gfx.me_fw);
1126         adev->gfx.me_fw = NULL;
1127         release_firmware(adev->gfx.ce_fw);
1128         adev->gfx.ce_fw = NULL;
1129         release_firmware(adev->gfx.rlc_fw);
1130         adev->gfx.rlc_fw = NULL;
1131         release_firmware(adev->gfx.mec_fw);
1132         adev->gfx.mec_fw = NULL;
1133         release_firmware(adev->gfx.mec2_fw);
1134         adev->gfx.mec2_fw = NULL;
1135
1136         kfree(adev->gfx.rlc.register_list_format);
1137 }
1138
1139 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1140 {
1141         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1142
1143         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1144         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1145         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1146         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1147         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1148         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1149         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1150         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1151         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1152         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1153         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1154         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1155         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1156         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1157                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1158 }
1159
1160 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1161 {
1162         adev->gfx.me_fw_write_wait = false;
1163         adev->gfx.mec_fw_write_wait = false;
1164
1165         if ((adev->asic_type != CHIP_ARCTURUS) &&
1166             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1167             (adev->gfx.mec_feature_version < 46) ||
1168             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1169             (adev->gfx.pfp_feature_version < 46)))
1170                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1171
1172         switch (adev->asic_type) {
1173         case CHIP_VEGA10:
1174                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1175                     (adev->gfx.me_feature_version >= 42) &&
1176                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1177                     (adev->gfx.pfp_feature_version >= 42))
1178                         adev->gfx.me_fw_write_wait = true;
1179
1180                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1181                     (adev->gfx.mec_feature_version >= 42))
1182                         adev->gfx.mec_fw_write_wait = true;
1183                 break;
1184         case CHIP_VEGA12:
1185                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1186                     (adev->gfx.me_feature_version >= 44) &&
1187                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1188                     (adev->gfx.pfp_feature_version >= 44))
1189                         adev->gfx.me_fw_write_wait = true;
1190
1191                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1192                     (adev->gfx.mec_feature_version >= 44))
1193                         adev->gfx.mec_fw_write_wait = true;
1194                 break;
1195         case CHIP_VEGA20:
1196                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1197                     (adev->gfx.me_feature_version >= 44) &&
1198                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1199                     (adev->gfx.pfp_feature_version >= 44))
1200                         adev->gfx.me_fw_write_wait = true;
1201
1202                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1203                     (adev->gfx.mec_feature_version >= 44))
1204                         adev->gfx.mec_fw_write_wait = true;
1205                 break;
1206         case CHIP_RAVEN:
1207                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1208                     (adev->gfx.me_feature_version >= 42) &&
1209                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1210                     (adev->gfx.pfp_feature_version >= 42))
1211                         adev->gfx.me_fw_write_wait = true;
1212
1213                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1214                     (adev->gfx.mec_feature_version >= 42))
1215                         adev->gfx.mec_fw_write_wait = true;
1216                 break;
1217         default:
1218                 adev->gfx.me_fw_write_wait = true;
1219                 adev->gfx.mec_fw_write_wait = true;
1220                 break;
1221         }
1222 }
1223
1224 struct amdgpu_gfxoff_quirk {
1225         u16 chip_vendor;
1226         u16 chip_device;
1227         u16 subsys_vendor;
1228         u16 subsys_device;
1229         u8 revision;
1230 };
1231
1232 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1233         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1234         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1235         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1236         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1237         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1238         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1239         { 0, 0, 0, 0, 0 },
1240 };
1241
1242 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1243 {
1244         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1245
1246         while (p && p->chip_device != 0) {
1247                 if (pdev->vendor == p->chip_vendor &&
1248                     pdev->device == p->chip_device &&
1249                     pdev->subsystem_vendor == p->subsys_vendor &&
1250                     pdev->subsystem_device == p->subsys_device &&
1251                     pdev->revision == p->revision) {
1252                         return true;
1253                 }
1254                 ++p;
1255         }
1256         return false;
1257 }
1258
1259 static bool is_raven_kicker(struct amdgpu_device *adev)
1260 {
1261         if (adev->pm.fw_version >= 0x41e2b)
1262                 return true;
1263         else
1264                 return false;
1265 }
1266
1267 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1268 {
1269         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1270                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1271
1272         switch (adev->asic_type) {
1273         case CHIP_VEGA10:
1274         case CHIP_VEGA12:
1275         case CHIP_VEGA20:
1276                 break;
1277         case CHIP_RAVEN:
1278                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1279                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1280                     ((!is_raven_kicker(adev) &&
1281                       adev->gfx.rlc_fw_version < 531) ||
1282                      (adev->gfx.rlc_feature_version < 1) ||
1283                      !adev->gfx.rlc.is_rlc_v2_1))
1284                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1285
1286                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1287                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1288                                 AMD_PG_SUPPORT_CP |
1289                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1290                 break;
1291         case CHIP_RENOIR:
1292                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1293                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1294                                 AMD_PG_SUPPORT_CP |
1295                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1296                 break;
1297         default:
1298                 break;
1299         }
1300 }
1301
1302 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1303                                           const char *chip_name)
1304 {
1305         char fw_name[30];
1306         int err;
1307         struct amdgpu_firmware_info *info = NULL;
1308         const struct common_firmware_header *header = NULL;
1309         const struct gfx_firmware_header_v1_0 *cp_hdr;
1310
1311         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1312         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1313         if (err)
1314                 goto out;
1315         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1316         if (err)
1317                 goto out;
1318         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1319         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1320         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1321
1322         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1323         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1324         if (err)
1325                 goto out;
1326         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1327         if (err)
1328                 goto out;
1329         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1330         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1331         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1332
1333         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1334         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1335         if (err)
1336                 goto out;
1337         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1338         if (err)
1339                 goto out;
1340         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1341         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1342         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1343
1344         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1345                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1346                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1347                 info->fw = adev->gfx.pfp_fw;
1348                 header = (const struct common_firmware_header *)info->fw->data;
1349                 adev->firmware.fw_size +=
1350                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1351
1352                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1353                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1354                 info->fw = adev->gfx.me_fw;
1355                 header = (const struct common_firmware_header *)info->fw->data;
1356                 adev->firmware.fw_size +=
1357                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1358
1359                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1360                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1361                 info->fw = adev->gfx.ce_fw;
1362                 header = (const struct common_firmware_header *)info->fw->data;
1363                 adev->firmware.fw_size +=
1364                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1365         }
1366
1367 out:
1368         if (err) {
1369                 dev_err(adev->dev,
1370                         "gfx9: Failed to load firmware \"%s\"\n",
1371                         fw_name);
1372                 release_firmware(adev->gfx.pfp_fw);
1373                 adev->gfx.pfp_fw = NULL;
1374                 release_firmware(adev->gfx.me_fw);
1375                 adev->gfx.me_fw = NULL;
1376                 release_firmware(adev->gfx.ce_fw);
1377                 adev->gfx.ce_fw = NULL;
1378         }
1379         return err;
1380 }
1381
1382 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1383                                           const char *chip_name)
1384 {
1385         char fw_name[30];
1386         int err;
1387         struct amdgpu_firmware_info *info = NULL;
1388         const struct common_firmware_header *header = NULL;
1389         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1390         unsigned int *tmp = NULL;
1391         unsigned int i = 0;
1392         uint16_t version_major;
1393         uint16_t version_minor;
1394         uint32_t smu_version;
1395
1396         /*
1397          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1398          * instead of picasso_rlc.bin.
1399          * Judgment method:
1400          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1401          *          or revision >= 0xD8 && revision <= 0xDF
1402          * otherwise is PCO FP5
1403          */
1404         if (!strcmp(chip_name, "picasso") &&
1405                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1406                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1407                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1408         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1409                 (smu_version >= 0x41e2b))
1410                 /**
1411                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1412                 */
1413                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1414         else
1415                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1416         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1417         if (err)
1418                 goto out;
1419         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1420         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1421
1422         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1423         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1424         if (version_major == 2 && version_minor == 1)
1425                 adev->gfx.rlc.is_rlc_v2_1 = true;
1426
1427         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1428         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1429         adev->gfx.rlc.save_and_restore_offset =
1430                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1431         adev->gfx.rlc.clear_state_descriptor_offset =
1432                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1433         adev->gfx.rlc.avail_scratch_ram_locations =
1434                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1435         adev->gfx.rlc.reg_restore_list_size =
1436                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1437         adev->gfx.rlc.reg_list_format_start =
1438                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1439         adev->gfx.rlc.reg_list_format_separate_start =
1440                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1441         adev->gfx.rlc.starting_offsets_start =
1442                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1443         adev->gfx.rlc.reg_list_format_size_bytes =
1444                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1445         adev->gfx.rlc.reg_list_size_bytes =
1446                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1447         adev->gfx.rlc.register_list_format =
1448                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1449                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1450         if (!adev->gfx.rlc.register_list_format) {
1451                 err = -ENOMEM;
1452                 goto out;
1453         }
1454
1455         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1456                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1457         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1458                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1459
1460         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1461
1462         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1463                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1464         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1465                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1466
1467         if (adev->gfx.rlc.is_rlc_v2_1)
1468                 gfx_v9_0_init_rlc_ext_microcode(adev);
1469
1470         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1471                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1472                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1473                 info->fw = adev->gfx.rlc_fw;
1474                 header = (const struct common_firmware_header *)info->fw->data;
1475                 adev->firmware.fw_size +=
1476                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1477
1478                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1479                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1480                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1481                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1482                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1483                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1484                         info->fw = adev->gfx.rlc_fw;
1485                         adev->firmware.fw_size +=
1486                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1487
1488                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1489                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1490                         info->fw = adev->gfx.rlc_fw;
1491                         adev->firmware.fw_size +=
1492                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1493
1494                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1495                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1496                         info->fw = adev->gfx.rlc_fw;
1497                         adev->firmware.fw_size +=
1498                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1499                 }
1500         }
1501
1502 out:
1503         if (err) {
1504                 dev_err(adev->dev,
1505                         "gfx9: Failed to load firmware \"%s\"\n",
1506                         fw_name);
1507                 release_firmware(adev->gfx.rlc_fw);
1508                 adev->gfx.rlc_fw = NULL;
1509         }
1510         return err;
1511 }
1512
1513 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1514                                           const char *chip_name)
1515 {
1516         char fw_name[30];
1517         int err;
1518         struct amdgpu_firmware_info *info = NULL;
1519         const struct common_firmware_header *header = NULL;
1520         const struct gfx_firmware_header_v1_0 *cp_hdr;
1521
1522         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1523         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1524         if (err)
1525                 goto out;
1526         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1527         if (err)
1528                 goto out;
1529         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1530         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1531         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1532
1533
1534         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1535         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1536         if (!err) {
1537                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1538                 if (err)
1539                         goto out;
1540                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1541                 adev->gfx.mec2_fw->data;
1542                 adev->gfx.mec2_fw_version =
1543                 le32_to_cpu(cp_hdr->header.ucode_version);
1544                 adev->gfx.mec2_feature_version =
1545                 le32_to_cpu(cp_hdr->ucode_feature_version);
1546         } else {
1547                 err = 0;
1548                 adev->gfx.mec2_fw = NULL;
1549         }
1550
1551         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1552                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1553                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1554                 info->fw = adev->gfx.mec_fw;
1555                 header = (const struct common_firmware_header *)info->fw->data;
1556                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1557                 adev->firmware.fw_size +=
1558                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1559
1560                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1561                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1562                 info->fw = adev->gfx.mec_fw;
1563                 adev->firmware.fw_size +=
1564                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1565
1566                 if (adev->gfx.mec2_fw) {
1567                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1568                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1569                         info->fw = adev->gfx.mec2_fw;
1570                         header = (const struct common_firmware_header *)info->fw->data;
1571                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1572                         adev->firmware.fw_size +=
1573                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1574
1575                         /* TODO: Determine if MEC2 JT FW loading can be removed
1576                                  for all GFX V9 asic and above */
1577                         if (adev->asic_type != CHIP_ARCTURUS &&
1578                             adev->asic_type != CHIP_RENOIR) {
1579                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1580                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1581                                 info->fw = adev->gfx.mec2_fw;
1582                                 adev->firmware.fw_size +=
1583                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1584                                         PAGE_SIZE);
1585                         }
1586                 }
1587         }
1588
1589 out:
1590         gfx_v9_0_check_if_need_gfxoff(adev);
1591         gfx_v9_0_check_fw_write_wait(adev);
1592         if (err) {
1593                 dev_err(adev->dev,
1594                         "gfx9: Failed to load firmware \"%s\"\n",
1595                         fw_name);
1596                 release_firmware(adev->gfx.mec_fw);
1597                 adev->gfx.mec_fw = NULL;
1598                 release_firmware(adev->gfx.mec2_fw);
1599                 adev->gfx.mec2_fw = NULL;
1600         }
1601         return err;
1602 }
1603
1604 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1605 {
1606         const char *chip_name;
1607         int r;
1608
1609         DRM_DEBUG("\n");
1610
1611         switch (adev->asic_type) {
1612         case CHIP_VEGA10:
1613                 chip_name = "vega10";
1614                 break;
1615         case CHIP_VEGA12:
1616                 chip_name = "vega12";
1617                 break;
1618         case CHIP_VEGA20:
1619                 chip_name = "vega20";
1620                 break;
1621         case CHIP_RAVEN:
1622                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1623                         chip_name = "raven2";
1624                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1625                         chip_name = "picasso";
1626                 else
1627                         chip_name = "raven";
1628                 break;
1629         case CHIP_ARCTURUS:
1630                 chip_name = "arcturus";
1631                 break;
1632         case CHIP_RENOIR:
1633                 chip_name = "renoir";
1634                 break;
1635         default:
1636                 BUG();
1637         }
1638
1639         /* No CPG in Arcturus */
1640         if (adev->asic_type != CHIP_ARCTURUS) {
1641                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1642                 if (r)
1643                         return r;
1644         }
1645
1646         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1647         if (r)
1648                 return r;
1649
1650         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1651         if (r)
1652                 return r;
1653
1654         return r;
1655 }
1656
1657 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1658 {
1659         u32 count = 0;
1660         const struct cs_section_def *sect = NULL;
1661         const struct cs_extent_def *ext = NULL;
1662
1663         /* begin clear state */
1664         count += 2;
1665         /* context control state */
1666         count += 3;
1667
1668         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1669                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1670                         if (sect->id == SECT_CONTEXT)
1671                                 count += 2 + ext->reg_count;
1672                         else
1673                                 return 0;
1674                 }
1675         }
1676
1677         /* end clear state */
1678         count += 2;
1679         /* clear state */
1680         count += 2;
1681
1682         return count;
1683 }
1684
1685 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1686                                     volatile u32 *buffer)
1687 {
1688         u32 count = 0, i;
1689         const struct cs_section_def *sect = NULL;
1690         const struct cs_extent_def *ext = NULL;
1691
1692         if (adev->gfx.rlc.cs_data == NULL)
1693                 return;
1694         if (buffer == NULL)
1695                 return;
1696
1697         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1698         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1699
1700         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1701         buffer[count++] = cpu_to_le32(0x80000000);
1702         buffer[count++] = cpu_to_le32(0x80000000);
1703
1704         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1705                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1706                         if (sect->id == SECT_CONTEXT) {
1707                                 buffer[count++] =
1708                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1709                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1710                                                 PACKET3_SET_CONTEXT_REG_START);
1711                                 for (i = 0; i < ext->reg_count; i++)
1712                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1713                         } else {
1714                                 return;
1715                         }
1716                 }
1717         }
1718
1719         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1720         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1721
1722         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1723         buffer[count++] = cpu_to_le32(0);
1724 }
1725
1726 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1727 {
1728         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1729         uint32_t pg_always_on_cu_num = 2;
1730         uint32_t always_on_cu_num;
1731         uint32_t i, j, k;
1732         uint32_t mask, cu_bitmap, counter;
1733
1734         if (adev->flags & AMD_IS_APU)
1735                 always_on_cu_num = 4;
1736         else if (adev->asic_type == CHIP_VEGA12)
1737                 always_on_cu_num = 8;
1738         else
1739                 always_on_cu_num = 12;
1740
1741         mutex_lock(&adev->grbm_idx_mutex);
1742         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1743                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1744                         mask = 1;
1745                         cu_bitmap = 0;
1746                         counter = 0;
1747                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1748
1749                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1750                                 if (cu_info->bitmap[i][j] & mask) {
1751                                         if (counter == pg_always_on_cu_num)
1752                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1753                                         if (counter < always_on_cu_num)
1754                                                 cu_bitmap |= mask;
1755                                         else
1756                                                 break;
1757                                         counter++;
1758                                 }
1759                                 mask <<= 1;
1760                         }
1761
1762                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1763                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1764                 }
1765         }
1766         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1767         mutex_unlock(&adev->grbm_idx_mutex);
1768 }
1769
1770 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1771 {
1772         uint32_t data;
1773
1774         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1775         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1776         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1777         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1778         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1779
1780         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1781         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1782
1783         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1784         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1785
1786         mutex_lock(&adev->grbm_idx_mutex);
1787         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1788         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1789         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1790
1791         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1792         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1793         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1794         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1795         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1796
1797         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1798         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1799         data &= 0x0000FFFF;
1800         data |= 0x00C00000;
1801         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1802
1803         /*
1804          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1805          * programmed in gfx_v9_0_init_always_on_cu_mask()
1806          */
1807
1808         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1809          * but used for RLC_LB_CNTL configuration */
1810         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1811         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1812         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1813         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1814         mutex_unlock(&adev->grbm_idx_mutex);
1815
1816         gfx_v9_0_init_always_on_cu_mask(adev);
1817 }
1818
1819 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1820 {
1821         uint32_t data;
1822
1823         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1824         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1825         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1826         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1827         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1828
1829         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1830         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1831
1832         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1833         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1834
1835         mutex_lock(&adev->grbm_idx_mutex);
1836         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1837         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1838         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1839
1840         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1841         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1842         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1843         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1844         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1845
1846         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1847         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1848         data &= 0x0000FFFF;
1849         data |= 0x00C00000;
1850         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1851
1852         /*
1853          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1854          * programmed in gfx_v9_0_init_always_on_cu_mask()
1855          */
1856
1857         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1858          * but used for RLC_LB_CNTL configuration */
1859         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1860         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1861         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1862         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1863         mutex_unlock(&adev->grbm_idx_mutex);
1864
1865         gfx_v9_0_init_always_on_cu_mask(adev);
1866 }
1867
1868 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1869 {
1870         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1871 }
1872
1873 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1874 {
1875         return 5;
1876 }
1877
1878 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1879 {
1880         const struct cs_section_def *cs_data;
1881         int r;
1882
1883         adev->gfx.rlc.cs_data = gfx9_cs_data;
1884
1885         cs_data = adev->gfx.rlc.cs_data;
1886
1887         if (cs_data) {
1888                 /* init clear state block */
1889                 r = amdgpu_gfx_rlc_init_csb(adev);
1890                 if (r)
1891                         return r;
1892         }
1893
1894         if (adev->flags & AMD_IS_APU) {
1895                 /* TODO: double check the cp_table_size for RV */
1896                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1897                 r = amdgpu_gfx_rlc_init_cpt(adev);
1898                 if (r)
1899                         return r;
1900         }
1901
1902         switch (adev->asic_type) {
1903         case CHIP_RAVEN:
1904                 gfx_v9_0_init_lbpw(adev);
1905                 break;
1906         case CHIP_VEGA20:
1907                 gfx_v9_4_init_lbpw(adev);
1908                 break;
1909         default:
1910                 break;
1911         }
1912
1913         /* init spm vmid with 0xf */
1914         if (adev->gfx.rlc.funcs->update_spm_vmid)
1915                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1916
1917         return 0;
1918 }
1919
1920 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1921 {
1922         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1923         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1924 }
1925
1926 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1927 {
1928         int r;
1929         u32 *hpd;
1930         const __le32 *fw_data;
1931         unsigned fw_size;
1932         u32 *fw;
1933         size_t mec_hpd_size;
1934
1935         const struct gfx_firmware_header_v1_0 *mec_hdr;
1936
1937         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1938
1939         /* take ownership of the relevant compute queues */
1940         amdgpu_gfx_compute_queue_acquire(adev);
1941         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1942         if (mec_hpd_size) {
1943                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1944                                               AMDGPU_GEM_DOMAIN_VRAM,
1945                                               &adev->gfx.mec.hpd_eop_obj,
1946                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1947                                               (void **)&hpd);
1948                 if (r) {
1949                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1950                         gfx_v9_0_mec_fini(adev);
1951                         return r;
1952                 }
1953
1954                 memset(hpd, 0, mec_hpd_size);
1955
1956                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1957                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1958         }
1959
1960         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1961
1962         fw_data = (const __le32 *)
1963                 (adev->gfx.mec_fw->data +
1964                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1965         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1966
1967         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1968                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1969                                       &adev->gfx.mec.mec_fw_obj,
1970                                       &adev->gfx.mec.mec_fw_gpu_addr,
1971                                       (void **)&fw);
1972         if (r) {
1973                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1974                 gfx_v9_0_mec_fini(adev);
1975                 return r;
1976         }
1977
1978         memcpy(fw, fw_data, fw_size);
1979
1980         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1981         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1982
1983         return 0;
1984 }
1985
1986 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1987 {
1988         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1989                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1990                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1991                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1992                 (SQ_IND_INDEX__FORCE_READ_MASK));
1993         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1994 }
1995
1996 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1997                            uint32_t wave, uint32_t thread,
1998                            uint32_t regno, uint32_t num, uint32_t *out)
1999 {
2000         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2001                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2002                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2003                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2004                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2005                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2006                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2007         while (num--)
2008                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2009 }
2010
2011 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2012 {
2013         /* type 1 wave data */
2014         dst[(*no_fields)++] = 1;
2015         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2016         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2017         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2018         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2019         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2020         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2021         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2022         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2023         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2024         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2025         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2026         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2027         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2028         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2029 }
2030
2031 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2032                                      uint32_t wave, uint32_t start,
2033                                      uint32_t size, uint32_t *dst)
2034 {
2035         wave_read_regs(
2036                 adev, simd, wave, 0,
2037                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2038 }
2039
2040 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2041                                      uint32_t wave, uint32_t thread,
2042                                      uint32_t start, uint32_t size,
2043                                      uint32_t *dst)
2044 {
2045         wave_read_regs(
2046                 adev, simd, wave, thread,
2047                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2048 }
2049
2050 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2051                                   u32 me, u32 pipe, u32 q, u32 vm)
2052 {
2053         soc15_grbm_select(adev, me, pipe, q, vm);
2054 }
2055
2056 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2057         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2058         .select_se_sh = &gfx_v9_0_select_se_sh,
2059         .read_wave_data = &gfx_v9_0_read_wave_data,
2060         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2061         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2062         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2063         .ras_error_inject = &gfx_v9_0_ras_error_inject,
2064         .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2065         .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2066 };
2067
2068 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2069         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2070         .select_se_sh = &gfx_v9_0_select_se_sh,
2071         .read_wave_data = &gfx_v9_0_read_wave_data,
2072         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2073         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2074         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2075         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2076         .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2077         .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2078         .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
2079 };
2080
2081 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2082 {
2083         u32 gb_addr_config;
2084         int err;
2085
2086         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2087
2088         switch (adev->asic_type) {
2089         case CHIP_VEGA10:
2090                 adev->gfx.config.max_hw_contexts = 8;
2091                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2092                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2093                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2094                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2095                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2096                 break;
2097         case CHIP_VEGA12:
2098                 adev->gfx.config.max_hw_contexts = 8;
2099                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2100                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2101                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2102                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2103                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2104                 DRM_INFO("fix gfx.config for vega12\n");
2105                 break;
2106         case CHIP_VEGA20:
2107                 adev->gfx.config.max_hw_contexts = 8;
2108                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2109                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2110                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2111                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2112                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2113                 gb_addr_config &= ~0xf3e777ff;
2114                 gb_addr_config |= 0x22014042;
2115                 /* check vbios table if gpu info is not available */
2116                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2117                 if (err)
2118                         return err;
2119                 break;
2120         case CHIP_RAVEN:
2121                 adev->gfx.config.max_hw_contexts = 8;
2122                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2123                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2124                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2125                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2126                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2127                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2128                 else
2129                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2130                 break;
2131         case CHIP_ARCTURUS:
2132                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2133                 adev->gfx.config.max_hw_contexts = 8;
2134                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2135                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2136                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2137                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2138                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2139                 gb_addr_config &= ~0xf3e777ff;
2140                 gb_addr_config |= 0x22014042;
2141                 break;
2142         case CHIP_RENOIR:
2143                 adev->gfx.config.max_hw_contexts = 8;
2144                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2145                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2146                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2147                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2148                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2149                 gb_addr_config &= ~0xf3e777ff;
2150                 gb_addr_config |= 0x22010042;
2151                 break;
2152         default:
2153                 BUG();
2154                 break;
2155         }
2156
2157         adev->gfx.config.gb_addr_config = gb_addr_config;
2158
2159         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2160                         REG_GET_FIELD(
2161                                         adev->gfx.config.gb_addr_config,
2162                                         GB_ADDR_CONFIG,
2163                                         NUM_PIPES);
2164
2165         adev->gfx.config.max_tile_pipes =
2166                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2167
2168         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2169                         REG_GET_FIELD(
2170                                         adev->gfx.config.gb_addr_config,
2171                                         GB_ADDR_CONFIG,
2172                                         NUM_BANKS);
2173         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2174                         REG_GET_FIELD(
2175                                         adev->gfx.config.gb_addr_config,
2176                                         GB_ADDR_CONFIG,
2177                                         MAX_COMPRESSED_FRAGS);
2178         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2179                         REG_GET_FIELD(
2180                                         adev->gfx.config.gb_addr_config,
2181                                         GB_ADDR_CONFIG,
2182                                         NUM_RB_PER_SE);
2183         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2184                         REG_GET_FIELD(
2185                                         adev->gfx.config.gb_addr_config,
2186                                         GB_ADDR_CONFIG,
2187                                         NUM_SHADER_ENGINES);
2188         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2189                         REG_GET_FIELD(
2190                                         adev->gfx.config.gb_addr_config,
2191                                         GB_ADDR_CONFIG,
2192                                         PIPE_INTERLEAVE_SIZE));
2193
2194         return 0;
2195 }
2196
2197 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2198                                       int mec, int pipe, int queue)
2199 {
2200         unsigned irq_type;
2201         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2202         unsigned int hw_prio;
2203
2204         ring = &adev->gfx.compute_ring[ring_id];
2205
2206         /* mec0 is me1 */
2207         ring->me = mec + 1;
2208         ring->pipe = pipe;
2209         ring->queue = queue;
2210
2211         ring->ring_obj = NULL;
2212         ring->use_doorbell = true;
2213         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2214         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2215                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2216         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2217
2218         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2219                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2220                 + ring->pipe;
2221         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
2222                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2223         /* type-2 packets are deprecated on MEC, use type-3 instead */
2224         return amdgpu_ring_init(adev, ring, 1024,
2225                                 &adev->gfx.eop_irq, irq_type, hw_prio);
2226 }
2227
2228 static int gfx_v9_0_sw_init(void *handle)
2229 {
2230         int i, j, k, r, ring_id;
2231         struct amdgpu_ring *ring;
2232         struct amdgpu_kiq *kiq;
2233         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2234
2235         switch (adev->asic_type) {
2236         case CHIP_VEGA10:
2237         case CHIP_VEGA12:
2238         case CHIP_VEGA20:
2239         case CHIP_RAVEN:
2240         case CHIP_ARCTURUS:
2241         case CHIP_RENOIR:
2242                 adev->gfx.mec.num_mec = 2;
2243                 break;
2244         default:
2245                 adev->gfx.mec.num_mec = 1;
2246                 break;
2247         }
2248
2249         adev->gfx.mec.num_pipe_per_mec = 4;
2250         adev->gfx.mec.num_queue_per_pipe = 8;
2251
2252         /* EOP Event */
2253         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2254         if (r)
2255                 return r;
2256
2257         /* Privileged reg */
2258         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2259                               &adev->gfx.priv_reg_irq);
2260         if (r)
2261                 return r;
2262
2263         /* Privileged inst */
2264         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2265                               &adev->gfx.priv_inst_irq);
2266         if (r)
2267                 return r;
2268
2269         /* ECC error */
2270         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2271                               &adev->gfx.cp_ecc_error_irq);
2272         if (r)
2273                 return r;
2274
2275         /* FUE error */
2276         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2277                               &adev->gfx.cp_ecc_error_irq);
2278         if (r)
2279                 return r;
2280
2281         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2282
2283         gfx_v9_0_scratch_init(adev);
2284
2285         r = gfx_v9_0_init_microcode(adev);
2286         if (r) {
2287                 DRM_ERROR("Failed to load gfx firmware!\n");
2288                 return r;
2289         }
2290
2291         r = adev->gfx.rlc.funcs->init(adev);
2292         if (r) {
2293                 DRM_ERROR("Failed to init rlc BOs!\n");
2294                 return r;
2295         }
2296
2297         r = gfx_v9_0_mec_init(adev);
2298         if (r) {
2299                 DRM_ERROR("Failed to init MEC BOs!\n");
2300                 return r;
2301         }
2302
2303         /* set up the gfx ring */
2304         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2305                 ring = &adev->gfx.gfx_ring[i];
2306                 ring->ring_obj = NULL;
2307                 if (!i)
2308                         sprintf(ring->name, "gfx");
2309                 else
2310                         sprintf(ring->name, "gfx_%d", i);
2311                 ring->use_doorbell = true;
2312                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2313                 r = amdgpu_ring_init(adev, ring, 1024,
2314                                      &adev->gfx.eop_irq,
2315                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2316                                      AMDGPU_RING_PRIO_DEFAULT);
2317                 if (r)
2318                         return r;
2319         }
2320
2321         /* set up the compute queues - allocate horizontally across pipes */
2322         ring_id = 0;
2323         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2324                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2325                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2326                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2327                                         continue;
2328
2329                                 r = gfx_v9_0_compute_ring_init(adev,
2330                                                                ring_id,
2331                                                                i, k, j);
2332                                 if (r)
2333                                         return r;
2334
2335                                 ring_id++;
2336                         }
2337                 }
2338         }
2339
2340         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2341         if (r) {
2342                 DRM_ERROR("Failed to init KIQ BOs!\n");
2343                 return r;
2344         }
2345
2346         kiq = &adev->gfx.kiq;
2347         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2348         if (r)
2349                 return r;
2350
2351         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2352         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2353         if (r)
2354                 return r;
2355
2356         adev->gfx.ce_ram_size = 0x8000;
2357
2358         r = gfx_v9_0_gpu_early_init(adev);
2359         if (r)
2360                 return r;
2361
2362         return 0;
2363 }
2364
2365
2366 static int gfx_v9_0_sw_fini(void *handle)
2367 {
2368         int i;
2369         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2370
2371         amdgpu_gfx_ras_fini(adev);
2372
2373         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2374                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2375         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2376                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2377
2378         amdgpu_gfx_mqd_sw_fini(adev);
2379         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2380         amdgpu_gfx_kiq_fini(adev);
2381
2382         gfx_v9_0_mec_fini(adev);
2383         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2384         if (adev->flags & AMD_IS_APU) {
2385                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2386                                 &adev->gfx.rlc.cp_table_gpu_addr,
2387                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2388         }
2389         gfx_v9_0_free_microcode(adev);
2390
2391         return 0;
2392 }
2393
2394
2395 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2396 {
2397         /* TODO */
2398 }
2399
2400 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2401                            u32 instance)
2402 {
2403         u32 data;
2404
2405         if (instance == 0xffffffff)
2406                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2407         else
2408                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2409
2410         if (se_num == 0xffffffff)
2411                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2412         else
2413                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2414
2415         if (sh_num == 0xffffffff)
2416                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2417         else
2418                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2419
2420         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2421 }
2422
2423 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2424 {
2425         u32 data, mask;
2426
2427         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2428         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2429
2430         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2431         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2432
2433         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2434                                          adev->gfx.config.max_sh_per_se);
2435
2436         return (~data) & mask;
2437 }
2438
2439 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2440 {
2441         int i, j;
2442         u32 data;
2443         u32 active_rbs = 0;
2444         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2445                                         adev->gfx.config.max_sh_per_se;
2446
2447         mutex_lock(&adev->grbm_idx_mutex);
2448         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2449                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2450                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2451                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2452                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2453                                                rb_bitmap_width_per_sh);
2454                 }
2455         }
2456         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2457         mutex_unlock(&adev->grbm_idx_mutex);
2458
2459         adev->gfx.config.backend_enable_mask = active_rbs;
2460         adev->gfx.config.num_rbs = hweight32(active_rbs);
2461 }
2462
2463 #define DEFAULT_SH_MEM_BASES    (0x6000)
2464 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2465 {
2466         int i;
2467         uint32_t sh_mem_config;
2468         uint32_t sh_mem_bases;
2469
2470         /*
2471          * Configure apertures:
2472          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2473          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2474          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2475          */
2476         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2477
2478         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2479                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2480                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2481
2482         mutex_lock(&adev->srbm_mutex);
2483         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2484                 soc15_grbm_select(adev, 0, 0, 0, i);
2485                 /* CP and shaders */
2486                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2487                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2488         }
2489         soc15_grbm_select(adev, 0, 0, 0, 0);
2490         mutex_unlock(&adev->srbm_mutex);
2491
2492         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2493            acccess. These should be enabled by FW for target VMIDs. */
2494         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2495                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2496                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2497                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2498                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2499         }
2500 }
2501
2502 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2503 {
2504         int vmid;
2505
2506         /*
2507          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2508          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2509          * the driver can enable them for graphics. VMID0 should maintain
2510          * access so that HWS firmware can save/restore entries.
2511          */
2512         for (vmid = 1; vmid < 16; vmid++) {
2513                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2514                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2515                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2516                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2517         }
2518 }
2519
2520 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2521 {
2522         uint32_t tmp;
2523
2524         switch (adev->asic_type) {
2525         case CHIP_ARCTURUS:
2526                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2527                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2528                                         DISABLE_BARRIER_WAITCNT, 1);
2529                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2530                 break;
2531         default:
2532                 break;
2533         }
2534 }
2535
2536 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2537 {
2538         u32 tmp;
2539         int i;
2540
2541         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2542
2543         gfx_v9_0_tiling_mode_table_init(adev);
2544
2545         gfx_v9_0_setup_rb(adev);
2546         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2547         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2548
2549         /* XXX SH_MEM regs */
2550         /* where to put LDS, scratch, GPUVM in FSA64 space */
2551         mutex_lock(&adev->srbm_mutex);
2552         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2553                 soc15_grbm_select(adev, 0, 0, 0, i);
2554                 /* CP and shaders */
2555                 if (i == 0) {
2556                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2557                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2558                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2559                                             !!adev->gmc.noretry);
2560                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2561                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2562                 } else {
2563                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2564                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2565                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2566                                             !!adev->gmc.noretry);
2567                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2568                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2569                                 (adev->gmc.private_aperture_start >> 48));
2570                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2571                                 (adev->gmc.shared_aperture_start >> 48));
2572                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2573                 }
2574         }
2575         soc15_grbm_select(adev, 0, 0, 0, 0);
2576
2577         mutex_unlock(&adev->srbm_mutex);
2578
2579         gfx_v9_0_init_compute_vmid(adev);
2580         gfx_v9_0_init_gds_vmid(adev);
2581         gfx_v9_0_init_sq_config(adev);
2582 }
2583
2584 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2585 {
2586         u32 i, j, k;
2587         u32 mask;
2588
2589         mutex_lock(&adev->grbm_idx_mutex);
2590         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2591                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2592                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2593                         for (k = 0; k < adev->usec_timeout; k++) {
2594                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2595                                         break;
2596                                 udelay(1);
2597                         }
2598                         if (k == adev->usec_timeout) {
2599                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2600                                                       0xffffffff, 0xffffffff);
2601                                 mutex_unlock(&adev->grbm_idx_mutex);
2602                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2603                                          i, j);
2604                                 return;
2605                         }
2606                 }
2607         }
2608         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2609         mutex_unlock(&adev->grbm_idx_mutex);
2610
2611         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2612                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2613                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2614                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2615         for (k = 0; k < adev->usec_timeout; k++) {
2616                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2617                         break;
2618                 udelay(1);
2619         }
2620 }
2621
2622 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2623                                                bool enable)
2624 {
2625         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2626
2627         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2628         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2629         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2630         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2631
2632         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2633 }
2634
2635 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2636 {
2637         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2638         /* csib */
2639         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2640                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2641         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2642                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2643         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2644                         adev->gfx.rlc.clear_state_size);
2645 }
2646
2647 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2648                                 int indirect_offset,
2649                                 int list_size,
2650                                 int *unique_indirect_regs,
2651                                 int unique_indirect_reg_count,
2652                                 int *indirect_start_offsets,
2653                                 int *indirect_start_offsets_count,
2654                                 int max_start_offsets_count)
2655 {
2656         int idx;
2657
2658         for (; indirect_offset < list_size; indirect_offset++) {
2659                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2660                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2661                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2662
2663                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2664                         indirect_offset += 2;
2665
2666                         /* look for the matching indice */
2667                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2668                                 if (unique_indirect_regs[idx] ==
2669                                         register_list_format[indirect_offset] ||
2670                                         !unique_indirect_regs[idx])
2671                                         break;
2672                         }
2673
2674                         BUG_ON(idx >= unique_indirect_reg_count);
2675
2676                         if (!unique_indirect_regs[idx])
2677                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2678
2679                         indirect_offset++;
2680                 }
2681         }
2682 }
2683
2684 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2685 {
2686         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2687         int unique_indirect_reg_count = 0;
2688
2689         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2690         int indirect_start_offsets_count = 0;
2691
2692         int list_size = 0;
2693         int i = 0, j = 0;
2694         u32 tmp = 0;
2695
2696         u32 *register_list_format =
2697                 kmemdup(adev->gfx.rlc.register_list_format,
2698                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2699         if (!register_list_format)
2700                 return -ENOMEM;
2701
2702         /* setup unique_indirect_regs array and indirect_start_offsets array */
2703         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2704         gfx_v9_1_parse_ind_reg_list(register_list_format,
2705                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2706                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2707                                     unique_indirect_regs,
2708                                     unique_indirect_reg_count,
2709                                     indirect_start_offsets,
2710                                     &indirect_start_offsets_count,
2711                                     ARRAY_SIZE(indirect_start_offsets));
2712
2713         /* enable auto inc in case it is disabled */
2714         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2715         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2716         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2717
2718         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2719         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2720                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2721         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2722                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2723                         adev->gfx.rlc.register_restore[i]);
2724
2725         /* load indirect register */
2726         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2727                 adev->gfx.rlc.reg_list_format_start);
2728
2729         /* direct register portion */
2730         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2731                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2732                         register_list_format[i]);
2733
2734         /* indirect register portion */
2735         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2736                 if (register_list_format[i] == 0xFFFFFFFF) {
2737                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2738                         continue;
2739                 }
2740
2741                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2742                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2743
2744                 for (j = 0; j < unique_indirect_reg_count; j++) {
2745                         if (register_list_format[i] == unique_indirect_regs[j]) {
2746                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2747                                 break;
2748                         }
2749                 }
2750
2751                 BUG_ON(j >= unique_indirect_reg_count);
2752
2753                 i++;
2754         }
2755
2756         /* set save/restore list size */
2757         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2758         list_size = list_size >> 1;
2759         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2760                 adev->gfx.rlc.reg_restore_list_size);
2761         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2762
2763         /* write the starting offsets to RLC scratch ram */
2764         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2765                 adev->gfx.rlc.starting_offsets_start);
2766         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2767                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2768                        indirect_start_offsets[i]);
2769
2770         /* load unique indirect regs*/
2771         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2772                 if (unique_indirect_regs[i] != 0) {
2773                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2774                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2775                                unique_indirect_regs[i] & 0x3FFFF);
2776
2777                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2778                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2779                                unique_indirect_regs[i] >> 20);
2780                 }
2781         }
2782
2783         kfree(register_list_format);
2784         return 0;
2785 }
2786
2787 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2788 {
2789         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2790 }
2791
2792 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2793                                              bool enable)
2794 {
2795         uint32_t data = 0;
2796         uint32_t default_data = 0;
2797
2798         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2799         if (enable) {
2800                 /* enable GFXIP control over CGPG */
2801                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2802                 if(default_data != data)
2803                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2804
2805                 /* update status */
2806                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2807                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2808                 if(default_data != data)
2809                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2810         } else {
2811                 /* restore GFXIP control over GCPG */
2812                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2813                 if(default_data != data)
2814                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2815         }
2816 }
2817
2818 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2819 {
2820         uint32_t data = 0;
2821
2822         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2823                               AMD_PG_SUPPORT_GFX_SMG |
2824                               AMD_PG_SUPPORT_GFX_DMG)) {
2825                 /* init IDLE_POLL_COUNT = 60 */
2826                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2827                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2828                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2829                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2830
2831                 /* init RLC PG Delay */
2832                 data = 0;
2833                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2834                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2835                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2836                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2837                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2838
2839                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2840                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2841                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2842                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2843
2844                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2845                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2846                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2847                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2848
2849                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2850                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2851
2852                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2853                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2854                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2855                 if (adev->asic_type != CHIP_RENOIR)
2856                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2857         }
2858 }
2859
2860 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2861                                                 bool enable)
2862 {
2863         uint32_t data = 0;
2864         uint32_t default_data = 0;
2865
2866         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2867         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2868                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2869                              enable ? 1 : 0);
2870         if (default_data != data)
2871                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2872 }
2873
2874 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2875                                                 bool enable)
2876 {
2877         uint32_t data = 0;
2878         uint32_t default_data = 0;
2879
2880         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2881         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2882                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2883                              enable ? 1 : 0);
2884         if(default_data != data)
2885                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2886 }
2887
2888 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2889                                         bool enable)
2890 {
2891         uint32_t data = 0;
2892         uint32_t default_data = 0;
2893
2894         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2895         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2896                              CP_PG_DISABLE,
2897                              enable ? 0 : 1);
2898         if(default_data != data)
2899                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2900 }
2901
2902 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2903                                                 bool enable)
2904 {
2905         uint32_t data, default_data;
2906
2907         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2908         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2909                              GFX_POWER_GATING_ENABLE,
2910                              enable ? 1 : 0);
2911         if(default_data != data)
2912                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2913 }
2914
2915 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2916                                                 bool enable)
2917 {
2918         uint32_t data, default_data;
2919
2920         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2921         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2922                              GFX_PIPELINE_PG_ENABLE,
2923                              enable ? 1 : 0);
2924         if(default_data != data)
2925                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2926
2927         if (!enable)
2928                 /* read any GFX register to wake up GFX */
2929                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2930 }
2931
2932 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2933                                                        bool enable)
2934 {
2935         uint32_t data, default_data;
2936
2937         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2938         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2939                              STATIC_PER_CU_PG_ENABLE,
2940                              enable ? 1 : 0);
2941         if(default_data != data)
2942                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2943 }
2944
2945 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2946                                                 bool enable)
2947 {
2948         uint32_t data, default_data;
2949
2950         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2951         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2952                              DYN_PER_CU_PG_ENABLE,
2953                              enable ? 1 : 0);
2954         if(default_data != data)
2955                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2956 }
2957
2958 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2959 {
2960         gfx_v9_0_init_csb(adev);
2961
2962         /*
2963          * Rlc save restore list is workable since v2_1.
2964          * And it's needed by gfxoff feature.
2965          */
2966         if (adev->gfx.rlc.is_rlc_v2_1) {
2967                 if (adev->asic_type == CHIP_VEGA12 ||
2968                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
2969                         gfx_v9_1_init_rlc_save_restore_list(adev);
2970                 gfx_v9_0_enable_save_restore_machine(adev);
2971         }
2972
2973         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2974                               AMD_PG_SUPPORT_GFX_SMG |
2975                               AMD_PG_SUPPORT_GFX_DMG |
2976                               AMD_PG_SUPPORT_CP |
2977                               AMD_PG_SUPPORT_GDS |
2978                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2979                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2980                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2981                 gfx_v9_0_init_gfx_power_gating(adev);
2982         }
2983 }
2984
2985 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2986 {
2987         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2988         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2989         gfx_v9_0_wait_for_rlc_serdes(adev);
2990 }
2991
2992 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2993 {
2994         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2995         udelay(50);
2996         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2997         udelay(50);
2998 }
2999
3000 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3001 {
3002 #ifdef AMDGPU_RLC_DEBUG_RETRY
3003         u32 rlc_ucode_ver;
3004 #endif
3005
3006         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3007         udelay(50);
3008
3009         /* carrizo do enable cp interrupt after cp inited */
3010         if (!(adev->flags & AMD_IS_APU)) {
3011                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3012                 udelay(50);
3013         }
3014
3015 #ifdef AMDGPU_RLC_DEBUG_RETRY
3016         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3017         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3018         if(rlc_ucode_ver == 0x108) {
3019                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3020                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3021                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3022                  * default is 0x9C4 to create a 100us interval */
3023                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3024                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3025                  * to disable the page fault retry interrupts, default is
3026                  * 0x100 (256) */
3027                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3028         }
3029 #endif
3030 }
3031
3032 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3033 {
3034         const struct rlc_firmware_header_v2_0 *hdr;
3035         const __le32 *fw_data;
3036         unsigned i, fw_size;
3037
3038         if (!adev->gfx.rlc_fw)
3039                 return -EINVAL;
3040
3041         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3042         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3043
3044         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3045                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3046         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3047
3048         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3049                         RLCG_UCODE_LOADING_START_ADDRESS);
3050         for (i = 0; i < fw_size; i++)
3051                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3052         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3053
3054         return 0;
3055 }
3056
3057 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3058 {
3059         int r;
3060
3061         if (amdgpu_sriov_vf(adev)) {
3062                 gfx_v9_0_init_csb(adev);
3063                 return 0;
3064         }
3065
3066         adev->gfx.rlc.funcs->stop(adev);
3067
3068         /* disable CG */
3069         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3070
3071         gfx_v9_0_init_pg(adev);
3072
3073         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3074                 /* legacy rlc firmware loading */
3075                 r = gfx_v9_0_rlc_load_microcode(adev);
3076                 if (r)
3077                         return r;
3078         }
3079
3080         switch (adev->asic_type) {
3081         case CHIP_RAVEN:
3082                 if (amdgpu_lbpw == 0)
3083                         gfx_v9_0_enable_lbpw(adev, false);
3084                 else
3085                         gfx_v9_0_enable_lbpw(adev, true);
3086                 break;
3087         case CHIP_VEGA20:
3088                 if (amdgpu_lbpw > 0)
3089                         gfx_v9_0_enable_lbpw(adev, true);
3090                 else
3091                         gfx_v9_0_enable_lbpw(adev, false);
3092                 break;
3093         default:
3094                 break;
3095         }
3096
3097         adev->gfx.rlc.funcs->start(adev);
3098
3099         return 0;
3100 }
3101
3102 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3103 {
3104         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3105
3106         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3107         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3108         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3109         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3110         udelay(50);
3111 }
3112
3113 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3114 {
3115         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3116         const struct gfx_firmware_header_v1_0 *ce_hdr;
3117         const struct gfx_firmware_header_v1_0 *me_hdr;
3118         const __le32 *fw_data;
3119         unsigned i, fw_size;
3120
3121         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3122                 return -EINVAL;
3123
3124         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3125                 adev->gfx.pfp_fw->data;
3126         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3127                 adev->gfx.ce_fw->data;
3128         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3129                 adev->gfx.me_fw->data;
3130
3131         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3132         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3133         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3134
3135         gfx_v9_0_cp_gfx_enable(adev, false);
3136
3137         /* PFP */
3138         fw_data = (const __le32 *)
3139                 (adev->gfx.pfp_fw->data +
3140                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3141         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3142         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3143         for (i = 0; i < fw_size; i++)
3144                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3145         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3146
3147         /* CE */
3148         fw_data = (const __le32 *)
3149                 (adev->gfx.ce_fw->data +
3150                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3151         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3152         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3153         for (i = 0; i < fw_size; i++)
3154                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3155         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3156
3157         /* ME */
3158         fw_data = (const __le32 *)
3159                 (adev->gfx.me_fw->data +
3160                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3161         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3162         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3163         for (i = 0; i < fw_size; i++)
3164                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3165         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3166
3167         return 0;
3168 }
3169
3170 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3171 {
3172         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3173         const struct cs_section_def *sect = NULL;
3174         const struct cs_extent_def *ext = NULL;
3175         int r, i, tmp;
3176
3177         /* init the CP */
3178         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3179         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3180
3181         gfx_v9_0_cp_gfx_enable(adev, true);
3182
3183         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3184         if (r) {
3185                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3186                 return r;
3187         }
3188
3189         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3190         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3191
3192         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3193         amdgpu_ring_write(ring, 0x80000000);
3194         amdgpu_ring_write(ring, 0x80000000);
3195
3196         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3197                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3198                         if (sect->id == SECT_CONTEXT) {
3199                                 amdgpu_ring_write(ring,
3200                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3201                                                ext->reg_count));
3202                                 amdgpu_ring_write(ring,
3203                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3204                                 for (i = 0; i < ext->reg_count; i++)
3205                                         amdgpu_ring_write(ring, ext->extent[i]);
3206                         }
3207                 }
3208         }
3209
3210         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3211         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3212
3213         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3214         amdgpu_ring_write(ring, 0);
3215
3216         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3217         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3218         amdgpu_ring_write(ring, 0x8000);
3219         amdgpu_ring_write(ring, 0x8000);
3220
3221         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3222         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3223                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3224         amdgpu_ring_write(ring, tmp);
3225         amdgpu_ring_write(ring, 0);
3226
3227         amdgpu_ring_commit(ring);
3228
3229         return 0;
3230 }
3231
3232 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3233 {
3234         struct amdgpu_ring *ring;
3235         u32 tmp;
3236         u32 rb_bufsz;
3237         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3238
3239         /* Set the write pointer delay */
3240         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3241
3242         /* set the RB to use vmid 0 */
3243         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3244
3245         /* Set ring buffer size */
3246         ring = &adev->gfx.gfx_ring[0];
3247         rb_bufsz = order_base_2(ring->ring_size / 8);
3248         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3249         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3250 #ifdef __BIG_ENDIAN
3251         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3252 #endif
3253         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3254
3255         /* Initialize the ring buffer's write pointers */
3256         ring->wptr = 0;
3257         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3258         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3259
3260         /* set the wb address wether it's enabled or not */
3261         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3262         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3263         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3264
3265         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3266         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3267         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3268
3269         mdelay(1);
3270         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3271
3272         rb_addr = ring->gpu_addr >> 8;
3273         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3274         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3275
3276         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3277         if (ring->use_doorbell) {
3278                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3279                                     DOORBELL_OFFSET, ring->doorbell_index);
3280                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3281                                     DOORBELL_EN, 1);
3282         } else {
3283                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3284         }
3285         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3286
3287         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3288                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3289         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3290
3291         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3292                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3293
3294
3295         /* start the ring */
3296         gfx_v9_0_cp_gfx_start(adev);
3297         ring->sched.ready = true;
3298
3299         return 0;
3300 }
3301
3302 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3303 {
3304         if (enable) {
3305                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3306         } else {
3307                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3308                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3309                 adev->gfx.kiq.ring.sched.ready = false;
3310         }
3311         udelay(50);
3312 }
3313
3314 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3315 {
3316         const struct gfx_firmware_header_v1_0 *mec_hdr;
3317         const __le32 *fw_data;
3318         unsigned i;
3319         u32 tmp;
3320
3321         if (!adev->gfx.mec_fw)
3322                 return -EINVAL;
3323
3324         gfx_v9_0_cp_compute_enable(adev, false);
3325
3326         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3327         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3328
3329         fw_data = (const __le32 *)
3330                 (adev->gfx.mec_fw->data +
3331                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3332         tmp = 0;
3333         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3334         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3335         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3336
3337         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3338                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3339         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3340                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3341
3342         /* MEC1 */
3343         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3344                          mec_hdr->jt_offset);
3345         for (i = 0; i < mec_hdr->jt_size; i++)
3346                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3347                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3348
3349         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3350                         adev->gfx.mec_fw_version);
3351         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3352
3353         return 0;
3354 }
3355
3356 /* KIQ functions */
3357 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3358 {
3359         uint32_t tmp;
3360         struct amdgpu_device *adev = ring->adev;
3361
3362         /* tell RLC which is KIQ queue */
3363         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3364         tmp &= 0xffffff00;
3365         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3366         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3367         tmp |= 0x80;
3368         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3369 }
3370
3371 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3372 {
3373         struct amdgpu_device *adev = ring->adev;
3374
3375         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3376                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
3377                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3378                         mqd->cp_hqd_queue_priority =
3379                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3380                 }
3381         }
3382 }
3383
3384 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3385 {
3386         struct amdgpu_device *adev = ring->adev;
3387         struct v9_mqd *mqd = ring->mqd_ptr;
3388         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3389         uint32_t tmp;
3390
3391         mqd->header = 0xC0310800;
3392         mqd->compute_pipelinestat_enable = 0x00000001;
3393         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3394         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3395         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3396         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3397         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3398         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3399         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3400         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3401         mqd->compute_misc_reserved = 0x00000003;
3402
3403         mqd->dynamic_cu_mask_addr_lo =
3404                 lower_32_bits(ring->mqd_gpu_addr
3405                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3406         mqd->dynamic_cu_mask_addr_hi =
3407                 upper_32_bits(ring->mqd_gpu_addr
3408                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3409
3410         eop_base_addr = ring->eop_gpu_addr >> 8;
3411         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3412         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3413
3414         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3415         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3416         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3417                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3418
3419         mqd->cp_hqd_eop_control = tmp;
3420
3421         /* enable doorbell? */
3422         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3423
3424         if (ring->use_doorbell) {
3425                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3426                                     DOORBELL_OFFSET, ring->doorbell_index);
3427                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3428                                     DOORBELL_EN, 1);
3429                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3430                                     DOORBELL_SOURCE, 0);
3431                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3432                                     DOORBELL_HIT, 0);
3433         } else {
3434                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3435                                          DOORBELL_EN, 0);
3436         }
3437
3438         mqd->cp_hqd_pq_doorbell_control = tmp;
3439
3440         /* disable the queue if it's active */
3441         ring->wptr = 0;
3442         mqd->cp_hqd_dequeue_request = 0;
3443         mqd->cp_hqd_pq_rptr = 0;
3444         mqd->cp_hqd_pq_wptr_lo = 0;
3445         mqd->cp_hqd_pq_wptr_hi = 0;
3446
3447         /* set the pointer to the MQD */
3448         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3449         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3450
3451         /* set MQD vmid to 0 */
3452         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3453         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3454         mqd->cp_mqd_control = tmp;
3455
3456         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3457         hqd_gpu_addr = ring->gpu_addr >> 8;
3458         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3459         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3460
3461         /* set up the HQD, this is similar to CP_RB0_CNTL */
3462         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3463         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3464                             (order_base_2(ring->ring_size / 4) - 1));
3465         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3466                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3467 #ifdef __BIG_ENDIAN
3468         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3469 #endif
3470         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3471         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3472         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3473         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3474         mqd->cp_hqd_pq_control = tmp;
3475
3476         /* set the wb address whether it's enabled or not */
3477         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3478         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3479         mqd->cp_hqd_pq_rptr_report_addr_hi =
3480                 upper_32_bits(wb_gpu_addr) & 0xffff;
3481
3482         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3483         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3484         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3485         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3486
3487         tmp = 0;
3488         /* enable the doorbell if requested */
3489         if (ring->use_doorbell) {
3490                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3491                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3492                                 DOORBELL_OFFSET, ring->doorbell_index);
3493
3494                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3495                                          DOORBELL_EN, 1);
3496                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3497                                          DOORBELL_SOURCE, 0);
3498                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3499                                          DOORBELL_HIT, 0);
3500         }
3501
3502         mqd->cp_hqd_pq_doorbell_control = tmp;
3503
3504         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3505         ring->wptr = 0;
3506         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3507
3508         /* set the vmid for the queue */
3509         mqd->cp_hqd_vmid = 0;
3510
3511         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3512         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3513         mqd->cp_hqd_persistent_state = tmp;
3514
3515         /* set MIN_IB_AVAIL_SIZE */
3516         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3517         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3518         mqd->cp_hqd_ib_control = tmp;
3519
3520         /* set static priority for a queue/ring */
3521         gfx_v9_0_mqd_set_priority(ring, mqd);
3522         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3523
3524         /* map_queues packet doesn't need activate the queue,
3525          * so only kiq need set this field.
3526          */
3527         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3528                 mqd->cp_hqd_active = 1;
3529
3530         return 0;
3531 }
3532
3533 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3534 {
3535         struct amdgpu_device *adev = ring->adev;
3536         struct v9_mqd *mqd = ring->mqd_ptr;
3537         int j;
3538
3539         /* disable wptr polling */
3540         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3541
3542         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3543                mqd->cp_hqd_eop_base_addr_lo);
3544         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3545                mqd->cp_hqd_eop_base_addr_hi);
3546
3547         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3548         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3549                mqd->cp_hqd_eop_control);
3550
3551         /* enable doorbell? */
3552         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3553                mqd->cp_hqd_pq_doorbell_control);
3554
3555         /* disable the queue if it's active */
3556         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3557                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3558                 for (j = 0; j < adev->usec_timeout; j++) {
3559                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3560                                 break;
3561                         udelay(1);
3562                 }
3563                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3564                        mqd->cp_hqd_dequeue_request);
3565                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3566                        mqd->cp_hqd_pq_rptr);
3567                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3568                        mqd->cp_hqd_pq_wptr_lo);
3569                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3570                        mqd->cp_hqd_pq_wptr_hi);
3571         }
3572
3573         /* set the pointer to the MQD */
3574         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3575                mqd->cp_mqd_base_addr_lo);
3576         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3577                mqd->cp_mqd_base_addr_hi);
3578
3579         /* set MQD vmid to 0 */
3580         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3581                mqd->cp_mqd_control);
3582
3583         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3584         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3585                mqd->cp_hqd_pq_base_lo);
3586         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3587                mqd->cp_hqd_pq_base_hi);
3588
3589         /* set up the HQD, this is similar to CP_RB0_CNTL */
3590         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3591                mqd->cp_hqd_pq_control);
3592
3593         /* set the wb address whether it's enabled or not */
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3595                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3596         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3597                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3598
3599         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3600         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3601                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3602         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3603                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3604
3605         /* enable the doorbell if requested */
3606         if (ring->use_doorbell) {
3607                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3608                                         (adev->doorbell_index.kiq * 2) << 2);
3609                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3610                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3611         }
3612
3613         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3614                mqd->cp_hqd_pq_doorbell_control);
3615
3616         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3617         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3618                mqd->cp_hqd_pq_wptr_lo);
3619         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3620                mqd->cp_hqd_pq_wptr_hi);
3621
3622         /* set the vmid for the queue */
3623         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3624
3625         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3626                mqd->cp_hqd_persistent_state);
3627
3628         /* activate the queue */
3629         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3630                mqd->cp_hqd_active);
3631
3632         if (ring->use_doorbell)
3633                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3634
3635         return 0;
3636 }
3637
3638 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3639 {
3640         struct amdgpu_device *adev = ring->adev;
3641         int j;
3642
3643         /* disable the queue if it's active */
3644         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3645
3646                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3647
3648                 for (j = 0; j < adev->usec_timeout; j++) {
3649                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3650                                 break;
3651                         udelay(1);
3652                 }
3653
3654                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3655                         DRM_DEBUG("KIQ dequeue request failed.\n");
3656
3657                         /* Manual disable if dequeue request times out */
3658                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3659                 }
3660
3661                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3662                       0);
3663         }
3664
3665         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3666         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3667         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3668         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3669         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3670         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3671         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3672         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3673
3674         return 0;
3675 }
3676
3677 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3678 {
3679         struct amdgpu_device *adev = ring->adev;
3680         struct v9_mqd *mqd = ring->mqd_ptr;
3681         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3682
3683         gfx_v9_0_kiq_setting(ring);
3684
3685         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3686                 /* reset MQD to a clean status */
3687                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3688                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3689
3690                 /* reset ring buffer */
3691                 ring->wptr = 0;
3692                 amdgpu_ring_clear_ring(ring);
3693
3694                 mutex_lock(&adev->srbm_mutex);
3695                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3696                 gfx_v9_0_kiq_init_register(ring);
3697                 soc15_grbm_select(adev, 0, 0, 0, 0);
3698                 mutex_unlock(&adev->srbm_mutex);
3699         } else {
3700                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3701                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3702                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3703                 mutex_lock(&adev->srbm_mutex);
3704                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3705                 gfx_v9_0_mqd_init(ring);
3706                 gfx_v9_0_kiq_init_register(ring);
3707                 soc15_grbm_select(adev, 0, 0, 0, 0);
3708                 mutex_unlock(&adev->srbm_mutex);
3709
3710                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3711                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3712         }
3713
3714         return 0;
3715 }
3716
3717 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3718 {
3719         struct amdgpu_device *adev = ring->adev;
3720         struct v9_mqd *mqd = ring->mqd_ptr;
3721         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3722
3723         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3724                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3725                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3726                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3727                 mutex_lock(&adev->srbm_mutex);
3728                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3729                 gfx_v9_0_mqd_init(ring);
3730                 soc15_grbm_select(adev, 0, 0, 0, 0);
3731                 mutex_unlock(&adev->srbm_mutex);
3732
3733                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3734                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3735         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3736                 /* reset MQD to a clean status */
3737                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3738                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3739
3740                 /* reset ring buffer */
3741                 ring->wptr = 0;
3742                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3743                 amdgpu_ring_clear_ring(ring);
3744         } else {
3745                 amdgpu_ring_clear_ring(ring);
3746         }
3747
3748         return 0;
3749 }
3750
3751 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3752 {
3753         struct amdgpu_ring *ring;
3754         int r;
3755
3756         ring = &adev->gfx.kiq.ring;
3757
3758         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3759         if (unlikely(r != 0))
3760                 return r;
3761
3762         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3763         if (unlikely(r != 0))
3764                 return r;
3765
3766         gfx_v9_0_kiq_init_queue(ring);
3767         amdgpu_bo_kunmap(ring->mqd_obj);
3768         ring->mqd_ptr = NULL;
3769         amdgpu_bo_unreserve(ring->mqd_obj);
3770         ring->sched.ready = true;
3771         return 0;
3772 }
3773
3774 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3775 {
3776         struct amdgpu_ring *ring = NULL;
3777         int r = 0, i;
3778
3779         gfx_v9_0_cp_compute_enable(adev, true);
3780
3781         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3782                 ring = &adev->gfx.compute_ring[i];
3783
3784                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3785                 if (unlikely(r != 0))
3786                         goto done;
3787                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3788                 if (!r) {
3789                         r = gfx_v9_0_kcq_init_queue(ring);
3790                         amdgpu_bo_kunmap(ring->mqd_obj);
3791                         ring->mqd_ptr = NULL;
3792                 }
3793                 amdgpu_bo_unreserve(ring->mqd_obj);
3794                 if (r)
3795                         goto done;
3796         }
3797
3798         r = amdgpu_gfx_enable_kcq(adev);
3799 done:
3800         return r;
3801 }
3802
3803 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3804 {
3805         int r, i;
3806         struct amdgpu_ring *ring;
3807
3808         if (!(adev->flags & AMD_IS_APU))
3809                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3810
3811         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3812                 if (adev->asic_type != CHIP_ARCTURUS) {
3813                         /* legacy firmware loading */
3814                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3815                         if (r)
3816                                 return r;
3817                 }
3818
3819                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3820                 if (r)
3821                         return r;
3822         }
3823
3824         r = gfx_v9_0_kiq_resume(adev);
3825         if (r)
3826                 return r;
3827
3828         if (adev->asic_type != CHIP_ARCTURUS) {
3829                 r = gfx_v9_0_cp_gfx_resume(adev);
3830                 if (r)
3831                         return r;
3832         }
3833
3834         r = gfx_v9_0_kcq_resume(adev);
3835         if (r)
3836                 return r;
3837
3838         if (adev->asic_type != CHIP_ARCTURUS) {
3839                 ring = &adev->gfx.gfx_ring[0];
3840                 r = amdgpu_ring_test_helper(ring);
3841                 if (r)
3842                         return r;
3843         }
3844
3845         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3846                 ring = &adev->gfx.compute_ring[i];
3847                 amdgpu_ring_test_helper(ring);
3848         }
3849
3850         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3851
3852         return 0;
3853 }
3854
3855 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3856 {
3857         u32 tmp;
3858
3859         if (adev->asic_type != CHIP_ARCTURUS)
3860                 return;
3861
3862         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3863         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3864                                 adev->df.hash_status.hash_64k);
3865         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3866                                 adev->df.hash_status.hash_2m);
3867         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3868                                 adev->df.hash_status.hash_1g);
3869         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3870 }
3871
3872 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3873 {
3874         if (adev->asic_type != CHIP_ARCTURUS)
3875                 gfx_v9_0_cp_gfx_enable(adev, enable);
3876         gfx_v9_0_cp_compute_enable(adev, enable);
3877 }
3878
3879 static int gfx_v9_0_hw_init(void *handle)
3880 {
3881         int r;
3882         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3883
3884         if (!amdgpu_sriov_vf(adev))
3885                 gfx_v9_0_init_golden_registers(adev);
3886
3887         gfx_v9_0_constants_init(adev);
3888
3889         gfx_v9_0_init_tcp_config(adev);
3890
3891         r = adev->gfx.rlc.funcs->resume(adev);
3892         if (r)
3893                 return r;
3894
3895         r = gfx_v9_0_cp_resume(adev);
3896         if (r)
3897                 return r;
3898
3899         return r;
3900 }
3901
3902 static int gfx_v9_0_hw_fini(void *handle)
3903 {
3904         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3905
3906         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3907         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3908         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3909
3910         /* DF freeze and kcq disable will fail */
3911         if (!amdgpu_ras_intr_triggered())
3912                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3913                 amdgpu_gfx_disable_kcq(adev);
3914
3915         if (amdgpu_sriov_vf(adev)) {
3916                 gfx_v9_0_cp_gfx_enable(adev, false);
3917                 /* must disable polling for SRIOV when hw finished, otherwise
3918                  * CPC engine may still keep fetching WB address which is already
3919                  * invalid after sw finished and trigger DMAR reading error in
3920                  * hypervisor side.
3921                  */
3922                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3923                 return 0;
3924         }
3925
3926         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3927          * otherwise KIQ is hanging when binding back
3928          */
3929         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3930                 mutex_lock(&adev->srbm_mutex);
3931                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3932                                 adev->gfx.kiq.ring.pipe,
3933                                 adev->gfx.kiq.ring.queue, 0);
3934                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3935                 soc15_grbm_select(adev, 0, 0, 0, 0);
3936                 mutex_unlock(&adev->srbm_mutex);
3937         }
3938
3939         gfx_v9_0_cp_enable(adev, false);
3940         adev->gfx.rlc.funcs->stop(adev);
3941
3942         return 0;
3943 }
3944
3945 static int gfx_v9_0_suspend(void *handle)
3946 {
3947         return gfx_v9_0_hw_fini(handle);
3948 }
3949
3950 static int gfx_v9_0_resume(void *handle)
3951 {
3952         return gfx_v9_0_hw_init(handle);
3953 }
3954
3955 static bool gfx_v9_0_is_idle(void *handle)
3956 {
3957         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3958
3959         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3960                                 GRBM_STATUS, GUI_ACTIVE))
3961                 return false;
3962         else
3963                 return true;
3964 }
3965
3966 static int gfx_v9_0_wait_for_idle(void *handle)
3967 {
3968         unsigned i;
3969         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3970
3971         for (i = 0; i < adev->usec_timeout; i++) {
3972                 if (gfx_v9_0_is_idle(handle))
3973                         return 0;
3974                 udelay(1);
3975         }
3976         return -ETIMEDOUT;
3977 }
3978
3979 static int gfx_v9_0_soft_reset(void *handle)
3980 {
3981         u32 grbm_soft_reset = 0;
3982         u32 tmp;
3983         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3984
3985         /* GRBM_STATUS */
3986         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3987         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3988                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3989                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3990                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3991                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3992                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3993                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3994                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3995                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3996                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3997         }
3998
3999         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4000                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4001                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4002         }
4003
4004         /* GRBM_STATUS2 */
4005         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4006         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4007                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4008                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4009
4010
4011         if (grbm_soft_reset) {
4012                 /* stop the rlc */
4013                 adev->gfx.rlc.funcs->stop(adev);
4014
4015                 if (adev->asic_type != CHIP_ARCTURUS)
4016                         /* Disable GFX parsing/prefetching */
4017                         gfx_v9_0_cp_gfx_enable(adev, false);
4018
4019                 /* Disable MEC parsing/prefetching */
4020                 gfx_v9_0_cp_compute_enable(adev, false);
4021
4022                 if (grbm_soft_reset) {
4023                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4024                         tmp |= grbm_soft_reset;
4025                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4026                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4027                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4028
4029                         udelay(50);
4030
4031                         tmp &= ~grbm_soft_reset;
4032                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4033                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4034                 }
4035
4036                 /* Wait a little for things to settle down */
4037                 udelay(50);
4038         }
4039         return 0;
4040 }
4041
4042 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4043 {
4044         signed long r, cnt = 0;
4045         unsigned long flags;
4046         uint32_t seq, reg_val_offs = 0;
4047         uint64_t value = 0;
4048         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4049         struct amdgpu_ring *ring = &kiq->ring;
4050
4051         BUG_ON(!ring->funcs->emit_rreg);
4052
4053         spin_lock_irqsave(&kiq->ring_lock, flags);
4054         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4055                 pr_err("critical bug! too many kiq readers\n");
4056                 goto failed_unlock;
4057         }
4058         amdgpu_ring_alloc(ring, 32);
4059         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4060         amdgpu_ring_write(ring, 9 |     /* src: register*/
4061                                 (5 << 8) |      /* dst: memory */
4062                                 (1 << 16) |     /* count sel */
4063                                 (1 << 20));     /* write confirm */
4064         amdgpu_ring_write(ring, 0);
4065         amdgpu_ring_write(ring, 0);
4066         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4067                                 reg_val_offs * 4));
4068         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4069                                 reg_val_offs * 4));
4070         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4071         if (r)
4072                 goto failed_undo;
4073
4074         amdgpu_ring_commit(ring);
4075         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4076
4077         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4078
4079         /* don't wait anymore for gpu reset case because this way may
4080          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4081          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4082          * never return if we keep waiting in virt_kiq_rreg, which cause
4083          * gpu_recover() hang there.
4084          *
4085          * also don't wait anymore for IRQ context
4086          * */
4087         if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
4088                 goto failed_kiq_read;
4089
4090         might_sleep();
4091         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4092                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4093                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4094         }
4095
4096         if (cnt > MAX_KIQ_REG_TRY)
4097                 goto failed_kiq_read;
4098
4099         mb();
4100         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4101                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4102         amdgpu_device_wb_free(adev, reg_val_offs);
4103         return value;
4104
4105 failed_undo:
4106         amdgpu_ring_undo(ring);
4107 failed_unlock:
4108         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4109 failed_kiq_read:
4110         if (reg_val_offs)
4111                 amdgpu_device_wb_free(adev, reg_val_offs);
4112         pr_err("failed to read gpu clock\n");
4113         return ~0;
4114 }
4115
4116 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4117 {
4118         uint64_t clock;
4119
4120         amdgpu_gfx_off_ctrl(adev, false);
4121         mutex_lock(&adev->gfx.gpu_clock_mutex);
4122         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4123                 clock = gfx_v9_0_kiq_read_clock(adev);
4124         } else {
4125                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4126                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4127                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4128         }
4129         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4130         amdgpu_gfx_off_ctrl(adev, true);
4131         return clock;
4132 }
4133
4134 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4135                                           uint32_t vmid,
4136                                           uint32_t gds_base, uint32_t gds_size,
4137                                           uint32_t gws_base, uint32_t gws_size,
4138                                           uint32_t oa_base, uint32_t oa_size)
4139 {
4140         struct amdgpu_device *adev = ring->adev;
4141
4142         /* GDS Base */
4143         gfx_v9_0_write_data_to_reg(ring, 0, false,
4144                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4145                                    gds_base);
4146
4147         /* GDS Size */
4148         gfx_v9_0_write_data_to_reg(ring, 0, false,
4149                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4150                                    gds_size);
4151
4152         /* GWS */
4153         gfx_v9_0_write_data_to_reg(ring, 0, false,
4154                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4155                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4156
4157         /* OA */
4158         gfx_v9_0_write_data_to_reg(ring, 0, false,
4159                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4160                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4161 }
4162
4163 static const u32 vgpr_init_compute_shader[] =
4164 {
4165         0xb07c0000, 0xbe8000ff,
4166         0x000000f8, 0xbf110800,
4167         0x7e000280, 0x7e020280,
4168         0x7e040280, 0x7e060280,
4169         0x7e080280, 0x7e0a0280,
4170         0x7e0c0280, 0x7e0e0280,
4171         0x80808800, 0xbe803200,
4172         0xbf84fff5, 0xbf9c0000,
4173         0xd28c0001, 0x0001007f,
4174         0xd28d0001, 0x0002027e,
4175         0x10020288, 0xb8810904,
4176         0xb7814000, 0xd1196a01,
4177         0x00000301, 0xbe800087,
4178         0xbefc00c1, 0xd89c4000,
4179         0x00020201, 0xd89cc080,
4180         0x00040401, 0x320202ff,
4181         0x00000800, 0x80808100,
4182         0xbf84fff8, 0x7e020280,
4183         0xbf810000, 0x00000000,
4184 };
4185
4186 static const u32 sgpr_init_compute_shader[] =
4187 {
4188         0xb07c0000, 0xbe8000ff,
4189         0x0000005f, 0xbee50080,
4190         0xbe812c65, 0xbe822c65,
4191         0xbe832c65, 0xbe842c65,
4192         0xbe852c65, 0xb77c0005,
4193         0x80808500, 0xbf84fff8,
4194         0xbe800080, 0xbf810000,
4195 };
4196
4197 static const u32 vgpr_init_compute_shader_arcturus[] = {
4198         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4199         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4200         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4201         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4202         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4203         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4204         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4205         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4206         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4207         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4208         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4209         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4210         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4211         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4212         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4213         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4214         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4215         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4216         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4217         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4218         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4219         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4220         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4221         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4222         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4223         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4224         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4225         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4226         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4227         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4228         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4229         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4230         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4231         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4232         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4233         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4234         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4235         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4236         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4237         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4238         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4239         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4240         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4241         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4242         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4243         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4244         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4245         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4246         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4247         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4248         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4249         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4250         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4251         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4252         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4253         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4254         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4255         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4256         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4257         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4258         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4259         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4260         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4261         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4262         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4263         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4264         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4265         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4266         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4267         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4268         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4269         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4270         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4271         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4272         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4273         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4274         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4275         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4276         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4277         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4278         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4279         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4280         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4281         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4282         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4283         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4284         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4285         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4286         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4287         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4288         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4289         0xbf84fff8, 0xbf810000,
4290 };
4291
4292 /* When below register arrays changed, please update gpr_reg_size,
4293   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4294   to cover all gfx9 ASICs */
4295 static const struct soc15_reg_entry vgpr_init_regs[] = {
4296    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4297    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4298    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4299    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4300    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4301    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4302    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4303    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4304    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4305    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4306    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4307    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4308    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4309    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4310 };
4311
4312 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4313    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4314    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4315    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4316    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4317    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4318    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4319    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4320    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4321    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4322    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4323    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4324    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4325    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4326    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4327 };
4328
4329 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4330    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4331    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4332    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4333    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4334    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4335    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4336    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4337    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4338    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4339    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4340    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4341    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4342    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4343    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4344 };
4345
4346 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4347    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4348    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4349    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4350    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4351    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4352    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4353    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4354    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4355    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4356    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4357    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4358    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4359    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4360    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4361 };
4362
4363 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4364    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4365    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4366    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4367    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4368    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4369    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4370    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4371    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4372    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4373    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4374    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4375    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4376    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4377    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4378    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4379    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4380    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4381    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4382    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4383    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4384    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4385    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4386    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4387    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4388    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4389    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4390    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4391    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4392    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4393    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4394    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4395    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4396    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4397 };
4398
4399 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4400 {
4401         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4402         int i, r;
4403
4404         /* only support when RAS is enabled */
4405         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4406                 return 0;
4407
4408         r = amdgpu_ring_alloc(ring, 7);
4409         if (r) {
4410                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4411                         ring->name, r);
4412                 return r;
4413         }
4414
4415         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4416         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4417
4418         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4419         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4420                                 PACKET3_DMA_DATA_DST_SEL(1) |
4421                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4422                                 PACKET3_DMA_DATA_ENGINE(0)));
4423         amdgpu_ring_write(ring, 0);
4424         amdgpu_ring_write(ring, 0);
4425         amdgpu_ring_write(ring, 0);
4426         amdgpu_ring_write(ring, 0);
4427         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4428                                 adev->gds.gds_size);
4429
4430         amdgpu_ring_commit(ring);
4431
4432         for (i = 0; i < adev->usec_timeout; i++) {
4433                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4434                         break;
4435                 udelay(1);
4436         }
4437
4438         if (i >= adev->usec_timeout)
4439                 r = -ETIMEDOUT;
4440
4441         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4442
4443         return r;
4444 }
4445
4446 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4447 {
4448         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4449         struct amdgpu_ib ib;
4450         struct dma_fence *f = NULL;
4451         int r, i;
4452         unsigned total_size, vgpr_offset, sgpr_offset;
4453         u64 gpu_addr;
4454
4455         int compute_dim_x = adev->gfx.config.max_shader_engines *
4456                                                 adev->gfx.config.max_cu_per_sh *
4457                                                 adev->gfx.config.max_sh_per_se;
4458         int sgpr_work_group_size = 5;
4459         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4460         int vgpr_init_shader_size;
4461         const u32 *vgpr_init_shader_ptr;
4462         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4463
4464         /* only support when RAS is enabled */
4465         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4466                 return 0;
4467
4468         /* bail if the compute ring is not ready */
4469         if (!ring->sched.ready)
4470                 return 0;
4471
4472         if (adev->asic_type == CHIP_ARCTURUS) {
4473                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4474                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4475                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4476         } else {
4477                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4478                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4479                 vgpr_init_regs_ptr = vgpr_init_regs;
4480         }
4481
4482         total_size =
4483                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4484         total_size +=
4485                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4486         total_size +=
4487                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4488         total_size = ALIGN(total_size, 256);
4489         vgpr_offset = total_size;
4490         total_size += ALIGN(vgpr_init_shader_size, 256);
4491         sgpr_offset = total_size;
4492         total_size += sizeof(sgpr_init_compute_shader);
4493
4494         /* allocate an indirect buffer to put the commands in */
4495         memset(&ib, 0, sizeof(ib));
4496         r = amdgpu_ib_get(adev, NULL, total_size,
4497                                         AMDGPU_IB_POOL_DIRECT, &ib);
4498         if (r) {
4499                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4500                 return r;
4501         }
4502
4503         /* load the compute shaders */
4504         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4505                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4506
4507         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4508                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4509
4510         /* init the ib length to 0 */
4511         ib.length_dw = 0;
4512
4513         /* VGPR */
4514         /* write the register state for the compute dispatch */
4515         for (i = 0; i < gpr_reg_size; i++) {
4516                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4517                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4518                                                                 - PACKET3_SET_SH_REG_START;
4519                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4520         }
4521         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4522         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4523         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4524         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4525                                                         - PACKET3_SET_SH_REG_START;
4526         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4527         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4528
4529         /* write dispatch packet */
4530         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4531         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4532         ib.ptr[ib.length_dw++] = 1; /* y */
4533         ib.ptr[ib.length_dw++] = 1; /* z */
4534         ib.ptr[ib.length_dw++] =
4535                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4536
4537         /* write CS partial flush packet */
4538         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4539         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4540
4541         /* SGPR1 */
4542         /* write the register state for the compute dispatch */
4543         for (i = 0; i < gpr_reg_size; i++) {
4544                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4545                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4546                                                                 - PACKET3_SET_SH_REG_START;
4547                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4548         }
4549         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4550         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4551         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4552         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4553                                                         - PACKET3_SET_SH_REG_START;
4554         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4555         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4556
4557         /* write dispatch packet */
4558         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4559         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4560         ib.ptr[ib.length_dw++] = 1; /* y */
4561         ib.ptr[ib.length_dw++] = 1; /* z */
4562         ib.ptr[ib.length_dw++] =
4563                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4564
4565         /* write CS partial flush packet */
4566         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4567         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4568
4569         /* SGPR2 */
4570         /* write the register state for the compute dispatch */
4571         for (i = 0; i < gpr_reg_size; i++) {
4572                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4573                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4574                                                                 - PACKET3_SET_SH_REG_START;
4575                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4576         }
4577         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4578         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4579         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4580         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4581                                                         - PACKET3_SET_SH_REG_START;
4582         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4583         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4584
4585         /* write dispatch packet */
4586         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4587         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4588         ib.ptr[ib.length_dw++] = 1; /* y */
4589         ib.ptr[ib.length_dw++] = 1; /* z */
4590         ib.ptr[ib.length_dw++] =
4591                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4592
4593         /* write CS partial flush packet */
4594         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4595         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4596
4597         /* shedule the ib on the ring */
4598         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4599         if (r) {
4600                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4601                 goto fail;
4602         }
4603
4604         /* wait for the GPU to finish processing the IB */
4605         r = dma_fence_wait(f, false);
4606         if (r) {
4607                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4608                 goto fail;
4609         }
4610
4611 fail:
4612         amdgpu_ib_free(adev, &ib, NULL);
4613         dma_fence_put(f);
4614
4615         return r;
4616 }
4617
4618 static int gfx_v9_0_early_init(void *handle)
4619 {
4620         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4621
4622         if (adev->asic_type == CHIP_ARCTURUS)
4623                 adev->gfx.num_gfx_rings = 0;
4624         else
4625                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4626         adev->gfx.num_compute_rings = amdgpu_num_kcq;
4627         gfx_v9_0_set_kiq_pm4_funcs(adev);
4628         gfx_v9_0_set_ring_funcs(adev);
4629         gfx_v9_0_set_irq_funcs(adev);
4630         gfx_v9_0_set_gds_init(adev);
4631         gfx_v9_0_set_rlc_funcs(adev);
4632
4633         return 0;
4634 }
4635
4636 static int gfx_v9_0_ecc_late_init(void *handle)
4637 {
4638         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4639         int r;
4640
4641         /*
4642          * Temp workaround to fix the issue that CP firmware fails to
4643          * update read pointer when CPDMA is writing clearing operation
4644          * to GDS in suspend/resume sequence on several cards. So just
4645          * limit this operation in cold boot sequence.
4646          */
4647         if (!adev->in_suspend) {
4648                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4649                 if (r)
4650                         return r;
4651         }
4652
4653         /* requires IBs so do in late init after IB pool is initialized */
4654         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4655         if (r)
4656                 return r;
4657
4658         if (adev->gfx.funcs &&
4659             adev->gfx.funcs->reset_ras_error_count)
4660                 adev->gfx.funcs->reset_ras_error_count(adev);
4661
4662         r = amdgpu_gfx_ras_late_init(adev);
4663         if (r)
4664                 return r;
4665
4666         return 0;
4667 }
4668
4669 static int gfx_v9_0_late_init(void *handle)
4670 {
4671         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4672         int r;
4673
4674         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4675         if (r)
4676                 return r;
4677
4678         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4679         if (r)
4680                 return r;
4681
4682         r = gfx_v9_0_ecc_late_init(handle);
4683         if (r)
4684                 return r;
4685
4686         return 0;
4687 }
4688
4689 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4690 {
4691         uint32_t rlc_setting;
4692
4693         /* if RLC is not enabled, do nothing */
4694         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4695         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4696                 return false;
4697
4698         return true;
4699 }
4700
4701 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4702 {
4703         uint32_t data;
4704         unsigned i;
4705
4706         data = RLC_SAFE_MODE__CMD_MASK;
4707         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4708         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4709
4710         /* wait for RLC_SAFE_MODE */
4711         for (i = 0; i < adev->usec_timeout; i++) {
4712                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4713                         break;
4714                 udelay(1);
4715         }
4716 }
4717
4718 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4719 {
4720         uint32_t data;
4721
4722         data = RLC_SAFE_MODE__CMD_MASK;
4723         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4724 }
4725
4726 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4727                                                 bool enable)
4728 {
4729         amdgpu_gfx_rlc_enter_safe_mode(adev);
4730
4731         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4732                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4733                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4734                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4735         } else {
4736                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4737                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4738                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4739         }
4740
4741         amdgpu_gfx_rlc_exit_safe_mode(adev);
4742 }
4743
4744 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4745                                                 bool enable)
4746 {
4747         /* TODO: double check if we need to perform under safe mode */
4748         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4749
4750         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4751                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4752         else
4753                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4754
4755         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4756                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4757         else
4758                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4759
4760         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4761 }
4762
4763 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4764                                                       bool enable)
4765 {
4766         uint32_t data, def;
4767
4768         amdgpu_gfx_rlc_enter_safe_mode(adev);
4769
4770         /* It is disabled by HW by default */
4771         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4772                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4773                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4774
4775                 if (adev->asic_type != CHIP_VEGA12)
4776                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4777
4778                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4779                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4780                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4781
4782                 /* only for Vega10 & Raven1 */
4783                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4784
4785                 if (def != data)
4786                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4787
4788                 /* MGLS is a global flag to control all MGLS in GFX */
4789                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4790                         /* 2 - RLC memory Light sleep */
4791                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4792                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4793                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4794                                 if (def != data)
4795                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4796                         }
4797                         /* 3 - CP memory Light sleep */
4798                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4799                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4800                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4801                                 if (def != data)
4802                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4803                         }
4804                 }
4805         } else {
4806                 /* 1 - MGCG_OVERRIDE */
4807                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4808
4809                 if (adev->asic_type != CHIP_VEGA12)
4810                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4811
4812                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4813                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4814                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4815                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4816
4817                 if (def != data)
4818                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4819
4820                 /* 2 - disable MGLS in RLC */
4821                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4822                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4823                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4824                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4825                 }
4826
4827                 /* 3 - disable MGLS in CP */
4828                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4829                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4830                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4831                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4832                 }
4833         }
4834
4835         amdgpu_gfx_rlc_exit_safe_mode(adev);
4836 }
4837
4838 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4839                                            bool enable)
4840 {
4841         uint32_t data, def;
4842
4843         if (adev->asic_type == CHIP_ARCTURUS)
4844                 return;
4845
4846         amdgpu_gfx_rlc_enter_safe_mode(adev);
4847
4848         /* Enable 3D CGCG/CGLS */
4849         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4850                 /* write cmd to clear cgcg/cgls ov */
4851                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4852                 /* unset CGCG override */
4853                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4854                 /* update CGCG and CGLS override bits */
4855                 if (def != data)
4856                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4857
4858                 /* enable 3Dcgcg FSM(0x0000363f) */
4859                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4860
4861                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4862                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4863                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4864                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4865                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4866                 if (def != data)
4867                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4868
4869                 /* set IDLE_POLL_COUNT(0x00900100) */
4870                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4871                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4872                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4873                 if (def != data)
4874                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4875         } else {
4876                 /* Disable CGCG/CGLS */
4877                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4878                 /* disable cgcg, cgls should be disabled */
4879                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4880                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4881                 /* disable cgcg and cgls in FSM */
4882                 if (def != data)
4883                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4884         }
4885
4886         amdgpu_gfx_rlc_exit_safe_mode(adev);
4887 }
4888
4889 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4890                                                       bool enable)
4891 {
4892         uint32_t def, data;
4893
4894         amdgpu_gfx_rlc_enter_safe_mode(adev);
4895
4896         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4897                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4898                 /* unset CGCG override */
4899                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4900                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4901                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4902                 else
4903                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4904                 /* update CGCG and CGLS override bits */
4905                 if (def != data)
4906                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4907
4908                 /* enable cgcg FSM(0x0000363F) */
4909                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4910
4911                 if (adev->asic_type == CHIP_ARCTURUS)
4912                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4913                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4914                 else
4915                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4916                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4917                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4918                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4919                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4920                 if (def != data)
4921                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4922
4923                 /* set IDLE_POLL_COUNT(0x00900100) */
4924                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4925                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4926                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4927                 if (def != data)
4928                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4929         } else {
4930                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4931                 /* reset CGCG/CGLS bits */
4932                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4933                 /* disable cgcg and cgls in FSM */
4934                 if (def != data)
4935                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4936         }
4937
4938         amdgpu_gfx_rlc_exit_safe_mode(adev);
4939 }
4940
4941 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4942                                             bool enable)
4943 {
4944         if (enable) {
4945                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4946                  * ===  MGCG + MGLS ===
4947                  */
4948                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4949                 /* ===  CGCG /CGLS for GFX 3D Only === */
4950                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4951                 /* ===  CGCG + CGLS === */
4952                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4953         } else {
4954                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4955                  * ===  CGCG + CGLS ===
4956                  */
4957                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4958                 /* ===  CGCG /CGLS for GFX 3D Only === */
4959                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4960                 /* ===  MGCG + MGLS === */
4961                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4962         }
4963         return 0;
4964 }
4965
4966 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4967 {
4968         u32 reg, data;
4969
4970         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4971         if (amdgpu_sriov_is_pp_one_vf(adev))
4972                 data = RREG32_NO_KIQ(reg);
4973         else
4974                 data = RREG32(reg);
4975
4976         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4977         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4978
4979         if (amdgpu_sriov_is_pp_one_vf(adev))
4980                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4981         else
4982                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4983 }
4984
4985 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4986                                         uint32_t offset,
4987                                         struct soc15_reg_rlcg *entries, int arr_size)
4988 {
4989         int i;
4990         uint32_t reg;
4991
4992         if (!entries)
4993                 return false;
4994
4995         for (i = 0; i < arr_size; i++) {
4996                 const struct soc15_reg_rlcg *entry;
4997
4998                 entry = &entries[i];
4999                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5000                 if (offset == reg)
5001                         return true;
5002         }
5003
5004         return false;
5005 }
5006
5007 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5008 {
5009         return gfx_v9_0_check_rlcg_range(adev, offset,
5010                                         (void *)rlcg_access_gc_9_0,
5011                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5012 }
5013
5014 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5015         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5016         .set_safe_mode = gfx_v9_0_set_safe_mode,
5017         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5018         .init = gfx_v9_0_rlc_init,
5019         .get_csb_size = gfx_v9_0_get_csb_size,
5020         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5021         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5022         .resume = gfx_v9_0_rlc_resume,
5023         .stop = gfx_v9_0_rlc_stop,
5024         .reset = gfx_v9_0_rlc_reset,
5025         .start = gfx_v9_0_rlc_start,
5026         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5027         .rlcg_wreg = gfx_v9_0_rlcg_wreg,
5028         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5029 };
5030
5031 static int gfx_v9_0_set_powergating_state(void *handle,
5032                                           enum amd_powergating_state state)
5033 {
5034         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5035         bool enable = (state == AMD_PG_STATE_GATE);
5036
5037         switch (adev->asic_type) {
5038         case CHIP_RAVEN:
5039         case CHIP_RENOIR:
5040                 if (!enable)
5041                         amdgpu_gfx_off_ctrl(adev, false);
5042
5043                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5044                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5045                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5046                 } else {
5047                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5048                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5049                 }
5050
5051                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5052                         gfx_v9_0_enable_cp_power_gating(adev, true);
5053                 else
5054                         gfx_v9_0_enable_cp_power_gating(adev, false);
5055
5056                 /* update gfx cgpg state */
5057                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5058
5059                 /* update mgcg state */
5060                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5061
5062                 if (enable)
5063                         amdgpu_gfx_off_ctrl(adev, true);
5064                 break;
5065         case CHIP_VEGA12:
5066                 amdgpu_gfx_off_ctrl(adev, enable);
5067                 break;
5068         default:
5069                 break;
5070         }
5071
5072         return 0;
5073 }
5074
5075 static int gfx_v9_0_set_clockgating_state(void *handle,
5076                                           enum amd_clockgating_state state)
5077 {
5078         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5079
5080         if (amdgpu_sriov_vf(adev))
5081                 return 0;
5082
5083         switch (adev->asic_type) {
5084         case CHIP_VEGA10:
5085         case CHIP_VEGA12:
5086         case CHIP_VEGA20:
5087         case CHIP_RAVEN:
5088         case CHIP_ARCTURUS:
5089         case CHIP_RENOIR:
5090                 gfx_v9_0_update_gfx_clock_gating(adev,
5091                                                  state == AMD_CG_STATE_GATE);
5092                 break;
5093         default:
5094                 break;
5095         }
5096         return 0;
5097 }
5098
5099 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5100 {
5101         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5102         int data;
5103
5104         if (amdgpu_sriov_vf(adev))
5105                 *flags = 0;
5106
5107         /* AMD_CG_SUPPORT_GFX_MGCG */
5108         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5109         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5110                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5111
5112         /* AMD_CG_SUPPORT_GFX_CGCG */
5113         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5114         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5115                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5116
5117         /* AMD_CG_SUPPORT_GFX_CGLS */
5118         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5119                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5120
5121         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5122         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5123         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5124                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5125
5126         /* AMD_CG_SUPPORT_GFX_CP_LS */
5127         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5128         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5129                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5130
5131         if (adev->asic_type != CHIP_ARCTURUS) {
5132                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5133                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5134                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5135                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5136
5137                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5138                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5139                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5140         }
5141 }
5142
5143 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5144 {
5145         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5146 }
5147
5148 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5149 {
5150         struct amdgpu_device *adev = ring->adev;
5151         u64 wptr;
5152
5153         /* XXX check if swapping is necessary on BE */
5154         if (ring->use_doorbell) {
5155                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5156         } else {
5157                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5158                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5159         }
5160
5161         return wptr;
5162 }
5163
5164 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5165 {
5166         struct amdgpu_device *adev = ring->adev;
5167
5168         if (ring->use_doorbell) {
5169                 /* XXX check if swapping is necessary on BE */
5170                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5171                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5172         } else {
5173                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5174                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5175         }
5176 }
5177
5178 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5179 {
5180         struct amdgpu_device *adev = ring->adev;
5181         u32 ref_and_mask, reg_mem_engine;
5182         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5183
5184         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5185                 switch (ring->me) {
5186                 case 1:
5187                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5188                         break;
5189                 case 2:
5190                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5191                         break;
5192                 default:
5193                         return;
5194                 }
5195                 reg_mem_engine = 0;
5196         } else {
5197                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5198                 reg_mem_engine = 1; /* pfp */
5199         }
5200
5201         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5202                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5203                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5204                               ref_and_mask, ref_and_mask, 0x20);
5205 }
5206
5207 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5208                                         struct amdgpu_job *job,
5209                                         struct amdgpu_ib *ib,
5210                                         uint32_t flags)
5211 {
5212         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5213         u32 header, control = 0;
5214
5215         if (ib->flags & AMDGPU_IB_FLAG_CE)
5216                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5217         else
5218                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5219
5220         control |= ib->length_dw | (vmid << 24);
5221
5222         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5223                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5224
5225                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5226                         gfx_v9_0_ring_emit_de_meta(ring);
5227         }
5228
5229         amdgpu_ring_write(ring, header);
5230         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5231         amdgpu_ring_write(ring,
5232 #ifdef __BIG_ENDIAN
5233                 (2 << 0) |
5234 #endif
5235                 lower_32_bits(ib->gpu_addr));
5236         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5237         amdgpu_ring_write(ring, control);
5238 }
5239
5240 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5241                                           struct amdgpu_job *job,
5242                                           struct amdgpu_ib *ib,
5243                                           uint32_t flags)
5244 {
5245         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5246         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5247
5248         /* Currently, there is a high possibility to get wave ID mismatch
5249          * between ME and GDS, leading to a hw deadlock, because ME generates
5250          * different wave IDs than the GDS expects. This situation happens
5251          * randomly when at least 5 compute pipes use GDS ordered append.
5252          * The wave IDs generated by ME are also wrong after suspend/resume.
5253          * Those are probably bugs somewhere else in the kernel driver.
5254          *
5255          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5256          * GDS to 0 for this ring (me/pipe).
5257          */
5258         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5259                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5260                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5261                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5262         }
5263
5264         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5265         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5266         amdgpu_ring_write(ring,
5267 #ifdef __BIG_ENDIAN
5268                                 (2 << 0) |
5269 #endif
5270                                 lower_32_bits(ib->gpu_addr));
5271         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5272         amdgpu_ring_write(ring, control);
5273 }
5274
5275 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5276                                      u64 seq, unsigned flags)
5277 {
5278         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5279         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5280         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5281
5282         /* RELEASE_MEM - flush caches, send int */
5283         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5284         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5285                                                EOP_TC_NC_ACTION_EN) :
5286                                               (EOP_TCL1_ACTION_EN |
5287                                                EOP_TC_ACTION_EN |
5288                                                EOP_TC_WB_ACTION_EN |
5289                                                EOP_TC_MD_ACTION_EN)) |
5290                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5291                                  EVENT_INDEX(5)));
5292         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5293
5294         /*
5295          * the address should be Qword aligned if 64bit write, Dword
5296          * aligned if only send 32bit data low (discard data high)
5297          */
5298         if (write64bit)
5299                 BUG_ON(addr & 0x7);
5300         else
5301                 BUG_ON(addr & 0x3);
5302         amdgpu_ring_write(ring, lower_32_bits(addr));
5303         amdgpu_ring_write(ring, upper_32_bits(addr));
5304         amdgpu_ring_write(ring, lower_32_bits(seq));
5305         amdgpu_ring_write(ring, upper_32_bits(seq));
5306         amdgpu_ring_write(ring, 0);
5307 }
5308
5309 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5310 {
5311         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5312         uint32_t seq = ring->fence_drv.sync_seq;
5313         uint64_t addr = ring->fence_drv.gpu_addr;
5314
5315         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5316                               lower_32_bits(addr), upper_32_bits(addr),
5317                               seq, 0xffffffff, 4);
5318 }
5319
5320 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5321                                         unsigned vmid, uint64_t pd_addr)
5322 {
5323         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5324
5325         /* compute doesn't have PFP */
5326         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5327                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5328                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5329                 amdgpu_ring_write(ring, 0x0);
5330         }
5331 }
5332
5333 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5334 {
5335         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5336 }
5337
5338 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5339 {
5340         u64 wptr;
5341
5342         /* XXX check if swapping is necessary on BE */
5343         if (ring->use_doorbell)
5344                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5345         else
5346                 BUG();
5347         return wptr;
5348 }
5349
5350 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5351 {
5352         struct amdgpu_device *adev = ring->adev;
5353
5354         /* XXX check if swapping is necessary on BE */
5355         if (ring->use_doorbell) {
5356                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5357                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5358         } else{
5359                 BUG(); /* only DOORBELL method supported on gfx9 now */
5360         }
5361 }
5362
5363 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5364                                          u64 seq, unsigned int flags)
5365 {
5366         struct amdgpu_device *adev = ring->adev;
5367
5368         /* we only allocate 32bit for each seq wb address */
5369         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5370
5371         /* write fence seq to the "addr" */
5372         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5373         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5374                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5375         amdgpu_ring_write(ring, lower_32_bits(addr));
5376         amdgpu_ring_write(ring, upper_32_bits(addr));
5377         amdgpu_ring_write(ring, lower_32_bits(seq));
5378
5379         if (flags & AMDGPU_FENCE_FLAG_INT) {
5380                 /* set register to trigger INT */
5381                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5382                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5383                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5384                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5385                 amdgpu_ring_write(ring, 0);
5386                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5387         }
5388 }
5389
5390 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5391 {
5392         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5393         amdgpu_ring_write(ring, 0);
5394 }
5395
5396 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5397 {
5398         struct v9_ce_ib_state ce_payload = {0};
5399         uint64_t csa_addr;
5400         int cnt;
5401
5402         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5403         csa_addr = amdgpu_csa_vaddr(ring->adev);
5404
5405         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5406         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5407                                  WRITE_DATA_DST_SEL(8) |
5408                                  WR_CONFIRM) |
5409                                  WRITE_DATA_CACHE_POLICY(0));
5410         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5411         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5412         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5413 }
5414
5415 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5416 {
5417         struct v9_de_ib_state de_payload = {0};
5418         uint64_t csa_addr, gds_addr;
5419         int cnt;
5420
5421         csa_addr = amdgpu_csa_vaddr(ring->adev);
5422         gds_addr = csa_addr + 4096;
5423         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5424         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5425
5426         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5427         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5428         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5429                                  WRITE_DATA_DST_SEL(8) |
5430                                  WR_CONFIRM) |
5431                                  WRITE_DATA_CACHE_POLICY(0));
5432         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5433         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5434         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5435 }
5436
5437 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5438                                    bool secure)
5439 {
5440         uint32_t v = secure ? FRAME_TMZ : 0;
5441
5442         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5443         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5444 }
5445
5446 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5447 {
5448         uint32_t dw2 = 0;
5449
5450         if (amdgpu_sriov_vf(ring->adev))
5451                 gfx_v9_0_ring_emit_ce_meta(ring);
5452
5453         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5454         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5455                 /* set load_global_config & load_global_uconfig */
5456                 dw2 |= 0x8001;
5457                 /* set load_cs_sh_regs */
5458                 dw2 |= 0x01000000;
5459                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5460                 dw2 |= 0x10002;
5461
5462                 /* set load_ce_ram if preamble presented */
5463                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5464                         dw2 |= 0x10000000;
5465         } else {
5466                 /* still load_ce_ram if this is the first time preamble presented
5467                  * although there is no context switch happens.
5468                  */
5469                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5470                         dw2 |= 0x10000000;
5471         }
5472
5473         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5474         amdgpu_ring_write(ring, dw2);
5475         amdgpu_ring_write(ring, 0);
5476 }
5477
5478 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5479 {
5480         unsigned ret;
5481         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5482         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5483         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5484         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5485         ret = ring->wptr & ring->buf_mask;
5486         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5487         return ret;
5488 }
5489
5490 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5491 {
5492         unsigned cur;
5493         BUG_ON(offset > ring->buf_mask);
5494         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5495
5496         cur = (ring->wptr & ring->buf_mask) - 1;
5497         if (likely(cur > offset))
5498                 ring->ring[offset] = cur - offset;
5499         else
5500                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5501 }
5502
5503 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5504                                     uint32_t reg_val_offs)
5505 {
5506         struct amdgpu_device *adev = ring->adev;
5507
5508         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5509         amdgpu_ring_write(ring, 0 |     /* src: register*/
5510                                 (5 << 8) |      /* dst: memory */
5511                                 (1 << 20));     /* write confirm */
5512         amdgpu_ring_write(ring, reg);
5513         amdgpu_ring_write(ring, 0);
5514         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5515                                 reg_val_offs * 4));
5516         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5517                                 reg_val_offs * 4));
5518 }
5519
5520 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5521                                     uint32_t val)
5522 {
5523         uint32_t cmd = 0;
5524
5525         switch (ring->funcs->type) {
5526         case AMDGPU_RING_TYPE_GFX:
5527                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5528                 break;
5529         case AMDGPU_RING_TYPE_KIQ:
5530                 cmd = (1 << 16); /* no inc addr */
5531                 break;
5532         default:
5533                 cmd = WR_CONFIRM;
5534                 break;
5535         }
5536         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5537         amdgpu_ring_write(ring, cmd);
5538         amdgpu_ring_write(ring, reg);
5539         amdgpu_ring_write(ring, 0);
5540         amdgpu_ring_write(ring, val);
5541 }
5542
5543 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5544                                         uint32_t val, uint32_t mask)
5545 {
5546         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5547 }
5548
5549 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5550                                                   uint32_t reg0, uint32_t reg1,
5551                                                   uint32_t ref, uint32_t mask)
5552 {
5553         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5554         struct amdgpu_device *adev = ring->adev;
5555         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5556                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5557
5558         if (fw_version_ok)
5559                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5560                                       ref, mask, 0x20);
5561         else
5562                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5563                                                            ref, mask);
5564 }
5565
5566 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5567 {
5568         struct amdgpu_device *adev = ring->adev;
5569         uint32_t value = 0;
5570
5571         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5572         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5573         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5574         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5575         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5576 }
5577
5578 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5579                                                  enum amdgpu_interrupt_state state)
5580 {
5581         switch (state) {
5582         case AMDGPU_IRQ_STATE_DISABLE:
5583         case AMDGPU_IRQ_STATE_ENABLE:
5584                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5585                                TIME_STAMP_INT_ENABLE,
5586                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5587                 break;
5588         default:
5589                 break;
5590         }
5591 }
5592
5593 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5594                                                      int me, int pipe,
5595                                                      enum amdgpu_interrupt_state state)
5596 {
5597         u32 mec_int_cntl, mec_int_cntl_reg;
5598
5599         /*
5600          * amdgpu controls only the first MEC. That's why this function only
5601          * handles the setting of interrupts for this specific MEC. All other
5602          * pipes' interrupts are set by amdkfd.
5603          */
5604
5605         if (me == 1) {
5606                 switch (pipe) {
5607                 case 0:
5608                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5609                         break;
5610                 case 1:
5611                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5612                         break;
5613                 case 2:
5614                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5615                         break;
5616                 case 3:
5617                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5618                         break;
5619                 default:
5620                         DRM_DEBUG("invalid pipe %d\n", pipe);
5621                         return;
5622                 }
5623         } else {
5624                 DRM_DEBUG("invalid me %d\n", me);
5625                 return;
5626         }
5627
5628         switch (state) {
5629         case AMDGPU_IRQ_STATE_DISABLE:
5630                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5631                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5632                                              TIME_STAMP_INT_ENABLE, 0);
5633                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5634                 break;
5635         case AMDGPU_IRQ_STATE_ENABLE:
5636                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5637                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5638                                              TIME_STAMP_INT_ENABLE, 1);
5639                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5640                 break;
5641         default:
5642                 break;
5643         }
5644 }
5645
5646 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5647                                              struct amdgpu_irq_src *source,
5648                                              unsigned type,
5649                                              enum amdgpu_interrupt_state state)
5650 {
5651         switch (state) {
5652         case AMDGPU_IRQ_STATE_DISABLE:
5653         case AMDGPU_IRQ_STATE_ENABLE:
5654                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5655                                PRIV_REG_INT_ENABLE,
5656                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5657                 break;
5658         default:
5659                 break;
5660         }
5661
5662         return 0;
5663 }
5664
5665 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5666                                               struct amdgpu_irq_src *source,
5667                                               unsigned type,
5668                                               enum amdgpu_interrupt_state state)
5669 {
5670         switch (state) {
5671         case AMDGPU_IRQ_STATE_DISABLE:
5672         case AMDGPU_IRQ_STATE_ENABLE:
5673                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5674                                PRIV_INSTR_INT_ENABLE,
5675                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5676         default:
5677                 break;
5678         }
5679
5680         return 0;
5681 }
5682
5683 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5684         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5685                         CP_ECC_ERROR_INT_ENABLE, 1)
5686
5687 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5688         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5689                         CP_ECC_ERROR_INT_ENABLE, 0)
5690
5691 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5692                                               struct amdgpu_irq_src *source,
5693                                               unsigned type,
5694                                               enum amdgpu_interrupt_state state)
5695 {
5696         switch (state) {
5697         case AMDGPU_IRQ_STATE_DISABLE:
5698                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5699                                 CP_ECC_ERROR_INT_ENABLE, 0);
5700                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5701                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5702                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5703                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5704                 break;
5705
5706         case AMDGPU_IRQ_STATE_ENABLE:
5707                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5708                                 CP_ECC_ERROR_INT_ENABLE, 1);
5709                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5710                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5711                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5712                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5713                 break;
5714         default:
5715                 break;
5716         }
5717
5718         return 0;
5719 }
5720
5721
5722 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5723                                             struct amdgpu_irq_src *src,
5724                                             unsigned type,
5725                                             enum amdgpu_interrupt_state state)
5726 {
5727         switch (type) {
5728         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5729                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5730                 break;
5731         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5732                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5733                 break;
5734         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5735                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5736                 break;
5737         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5738                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5739                 break;
5740         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5741                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5742                 break;
5743         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5744                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5745                 break;
5746         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5747                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5748                 break;
5749         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5750                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5751                 break;
5752         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5753                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5754                 break;
5755         default:
5756                 break;
5757         }
5758         return 0;
5759 }
5760
5761 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5762                             struct amdgpu_irq_src *source,
5763                             struct amdgpu_iv_entry *entry)
5764 {
5765         int i;
5766         u8 me_id, pipe_id, queue_id;
5767         struct amdgpu_ring *ring;
5768
5769         DRM_DEBUG("IH: CP EOP\n");
5770         me_id = (entry->ring_id & 0x0c) >> 2;
5771         pipe_id = (entry->ring_id & 0x03) >> 0;
5772         queue_id = (entry->ring_id & 0x70) >> 4;
5773
5774         switch (me_id) {
5775         case 0:
5776                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5777                 break;
5778         case 1:
5779         case 2:
5780                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5781                         ring = &adev->gfx.compute_ring[i];
5782                         /* Per-queue interrupt is supported for MEC starting from VI.
5783                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5784                           */
5785                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5786                                 amdgpu_fence_process(ring);
5787                 }
5788                 break;
5789         }
5790         return 0;
5791 }
5792
5793 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5794                            struct amdgpu_iv_entry *entry)
5795 {
5796         u8 me_id, pipe_id, queue_id;
5797         struct amdgpu_ring *ring;
5798         int i;
5799
5800         me_id = (entry->ring_id & 0x0c) >> 2;
5801         pipe_id = (entry->ring_id & 0x03) >> 0;
5802         queue_id = (entry->ring_id & 0x70) >> 4;
5803
5804         switch (me_id) {
5805         case 0:
5806                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5807                 break;
5808         case 1:
5809         case 2:
5810                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5811                         ring = &adev->gfx.compute_ring[i];
5812                         if (ring->me == me_id && ring->pipe == pipe_id &&
5813                             ring->queue == queue_id)
5814                                 drm_sched_fault(&ring->sched);
5815                 }
5816                 break;
5817         }
5818 }
5819
5820 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5821                                  struct amdgpu_irq_src *source,
5822                                  struct amdgpu_iv_entry *entry)
5823 {
5824         DRM_ERROR("Illegal register access in command stream\n");
5825         gfx_v9_0_fault(adev, entry);
5826         return 0;
5827 }
5828
5829 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5830                                   struct amdgpu_irq_src *source,
5831                                   struct amdgpu_iv_entry *entry)
5832 {
5833         DRM_ERROR("Illegal instruction in command stream\n");
5834         gfx_v9_0_fault(adev, entry);
5835         return 0;
5836 }
5837
5838
5839 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5840         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5841           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5842           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5843         },
5844         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5845           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5846           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5847         },
5848         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5849           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5850           0, 0
5851         },
5852         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5853           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5854           0, 0
5855         },
5856         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5857           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5858           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5859         },
5860         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5861           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5862           0, 0
5863         },
5864         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5865           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5866           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5867         },
5868         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5869           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5870           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5871         },
5872         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5873           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5874           0, 0
5875         },
5876         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5877           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5878           0, 0
5879         },
5880         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5881           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5882           0, 0
5883         },
5884         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5885           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5886           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5887         },
5888         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5889           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5890           0, 0
5891         },
5892         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5893           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5894           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5895         },
5896         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5897           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5898           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5899           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5900         },
5901         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5902           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5903           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5904           0, 0
5905         },
5906         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5907           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5908           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5909           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5910         },
5911         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5912           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5913           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5914           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5915         },
5916         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5917           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5918           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5919           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5920         },
5921         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5922           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5923           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5924           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5925         },
5926         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5927           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5928           0, 0
5929         },
5930         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5931           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5932           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5933         },
5934         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5935           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5936           0, 0
5937         },
5938         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5939           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5940           0, 0
5941         },
5942         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5943           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5944           0, 0
5945         },
5946         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5947           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5948           0, 0
5949         },
5950         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5951           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5952           0, 0
5953         },
5954         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5955           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5956           0, 0
5957         },
5958         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5959           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5960           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5961         },
5962         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5963           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5964           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5965         },
5966         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5967           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5968           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5969         },
5970         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5971           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5972           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5973         },
5974         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5975           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5976           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5977         },
5978         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5979           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5980           0, 0
5981         },
5982         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5983           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5984           0, 0
5985         },
5986         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5987           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5988           0, 0
5989         },
5990         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5991           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5992           0, 0
5993         },
5994         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5995           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5996           0, 0
5997         },
5998         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5999           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6000           0, 0
6001         },
6002         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6003           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6004           0, 0
6005         },
6006         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6007           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6008           0, 0
6009         },
6010         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6011           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6012           0, 0
6013         },
6014         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6015           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6016           0, 0
6017         },
6018         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6019           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6020           0, 0
6021         },
6022         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6023           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6024           0, 0
6025         },
6026         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6027           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6028           0, 0
6029         },
6030         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6031           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6032           0, 0
6033         },
6034         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6035           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6036           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6037         },
6038         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6039           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6040           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6041         },
6042         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6043           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6044           0, 0
6045         },
6046         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6047           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6048           0, 0
6049         },
6050         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6051           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6052           0, 0
6053         },
6054         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6055           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6056           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6057         },
6058         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6059           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6060           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6061         },
6062         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6063           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6064           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6065         },
6066         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6067           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6068           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6069         },
6070         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6071           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6072           0, 0
6073         },
6074         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6075           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6076           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6077         },
6078         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6079           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6080           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6081         },
6082         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6083           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6084           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6085         },
6086         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6087           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6088           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6089         },
6090         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6091           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6092           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6093         },
6094         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6095           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6096           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6097         },
6098         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6099           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6100           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6101         },
6102         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6103           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6104           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6105         },
6106         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6107           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6108           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6109         },
6110         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6111           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6112           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6113         },
6114         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6115           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6116           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6117         },
6118         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6119           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6120           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6121         },
6122         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6123           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6124           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6125         },
6126         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6127           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6128           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6129         },
6130         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6131           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6132           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6133         },
6134         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6135           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6136           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6137         },
6138         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6139           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6140           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6141         },
6142         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6143           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6144           0, 0
6145         },
6146         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6147           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6148           0, 0
6149         },
6150         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6151           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6152           0, 0
6153         },
6154         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6155           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6156           0, 0
6157         },
6158         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6159           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6160           0, 0
6161         },
6162         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6163           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6164           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6165         },
6166         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6167           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6168           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6169         },
6170         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6171           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6172           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6173         },
6174         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6175           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6176           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6177         },
6178         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6179           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6180           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6181         },
6182         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6183           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6184           0, 0
6185         },
6186         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6187           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6188           0, 0
6189         },
6190         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6191           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6192           0, 0
6193         },
6194         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6195           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6196           0, 0
6197         },
6198         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6199           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6200           0, 0
6201         },
6202         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6203           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6204           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6205         },
6206         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6207           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6208           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6209         },
6210         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6211           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6212           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6213         },
6214         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6215           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6216           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6217         },
6218         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6219           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6220           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6221         },
6222         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6223           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6224           0, 0
6225         },
6226         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6227           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6228           0, 0
6229         },
6230         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6231           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6232           0, 0
6233         },
6234         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6235           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6236           0, 0
6237         },
6238         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6239           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6240           0, 0
6241         },
6242         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6243           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6244           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6245         },
6246         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6247           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6248           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6249         },
6250         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6251           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6252           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6253         },
6254         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6255           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6256           0, 0
6257         },
6258         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6259           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6260           0, 0
6261         },
6262         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6263           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6264           0, 0
6265         },
6266         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6267           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6268           0, 0
6269         },
6270         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6271           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6272           0, 0
6273         },
6274         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6275           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6276           0, 0
6277         }
6278 };
6279
6280 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6281                                      void *inject_if)
6282 {
6283         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6284         int ret;
6285         struct ta_ras_trigger_error_input block_info = { 0 };
6286
6287         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6288                 return -EINVAL;
6289
6290         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6291                 return -EINVAL;
6292
6293         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6294                 return -EPERM;
6295
6296         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6297               info->head.type)) {
6298                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6299                         ras_gfx_subblocks[info->head.sub_block_index].name,
6300                         info->head.type);
6301                 return -EPERM;
6302         }
6303
6304         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6305               info->head.type)) {
6306                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6307                         ras_gfx_subblocks[info->head.sub_block_index].name,
6308                         info->head.type);
6309                 return -EPERM;
6310         }
6311
6312         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6313         block_info.sub_block_index =
6314                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6315         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6316         block_info.address = info->address;
6317         block_info.value = info->value;
6318
6319         mutex_lock(&adev->grbm_idx_mutex);
6320         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6321         mutex_unlock(&adev->grbm_idx_mutex);
6322
6323         return ret;
6324 }
6325
6326 static const char *vml2_mems[] = {
6327         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6328         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6329         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6330         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6331         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6332         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6333         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6334         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6335         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6336         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6337         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6338         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6339         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6340         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6341         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6342         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6343 };
6344
6345 static const char *vml2_walker_mems[] = {
6346         "UTC_VML2_CACHE_PDE0_MEM0",
6347         "UTC_VML2_CACHE_PDE0_MEM1",
6348         "UTC_VML2_CACHE_PDE1_MEM0",
6349         "UTC_VML2_CACHE_PDE1_MEM1",
6350         "UTC_VML2_CACHE_PDE2_MEM0",
6351         "UTC_VML2_CACHE_PDE2_MEM1",
6352         "UTC_VML2_RDIF_LOG_FIFO",
6353 };
6354
6355 static const char *atc_l2_cache_2m_mems[] = {
6356         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6357         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6358         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6359         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6360 };
6361
6362 static const char *atc_l2_cache_4k_mems[] = {
6363         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6364         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6365         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6366         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6367         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6368         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6369         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6370         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6371         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6372         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6373         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6374         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6375         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6376         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6377         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6378         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6379         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6380         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6381         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6382         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6383         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6384         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6385         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6386         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6387         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6388         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6389         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6390         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6391         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6392         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6393         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6394         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6395 };
6396
6397 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6398                                          struct ras_err_data *err_data)
6399 {
6400         uint32_t i, data;
6401         uint32_t sec_count, ded_count;
6402
6403         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6404         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6405         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6406         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6407         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6408         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6409         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6410         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6411
6412         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6413                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6414                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6415
6416                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6417                 if (sec_count) {
6418                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6419                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6420                         err_data->ce_count += sec_count;
6421                 }
6422
6423                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6424                 if (ded_count) {
6425                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6426                                 "DED %d\n", i, vml2_mems[i], ded_count);
6427                         err_data->ue_count += ded_count;
6428                 }
6429         }
6430
6431         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6432                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6433                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6434
6435                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6436                                                 SEC_COUNT);
6437                 if (sec_count) {
6438                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6439                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6440                         err_data->ce_count += sec_count;
6441                 }
6442
6443                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6444                                                 DED_COUNT);
6445                 if (ded_count) {
6446                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6447                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6448                         err_data->ue_count += ded_count;
6449                 }
6450         }
6451
6452         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6453                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6454                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6455
6456                 sec_count = (data & 0x00006000L) >> 0xd;
6457                 if (sec_count) {
6458                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6459                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6460                                 sec_count);
6461                         err_data->ce_count += sec_count;
6462                 }
6463         }
6464
6465         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6466                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6467                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6468
6469                 sec_count = (data & 0x00006000L) >> 0xd;
6470                 if (sec_count) {
6471                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6472                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6473                                 sec_count);
6474                         err_data->ce_count += sec_count;
6475                 }
6476
6477                 ded_count = (data & 0x00018000L) >> 0xf;
6478                 if (ded_count) {
6479                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6480                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6481                                 ded_count);
6482                         err_data->ue_count += ded_count;
6483                 }
6484         }
6485
6486         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6487         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6488         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6489         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6490
6491         return 0;
6492 }
6493
6494 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6495         const struct soc15_reg_entry *reg,
6496         uint32_t se_id, uint32_t inst_id, uint32_t value,
6497         uint32_t *sec_count, uint32_t *ded_count)
6498 {
6499         uint32_t i;
6500         uint32_t sec_cnt, ded_cnt;
6501
6502         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6503                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6504                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6505                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6506                         continue;
6507
6508                 sec_cnt = (value &
6509                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6510                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6511                 if (sec_cnt) {
6512                         dev_info(adev->dev, "GFX SubBlock %s, "
6513                                 "Instance[%d][%d], SEC %d\n",
6514                                 gfx_v9_0_ras_fields[i].name,
6515                                 se_id, inst_id,
6516                                 sec_cnt);
6517                         *sec_count += sec_cnt;
6518                 }
6519
6520                 ded_cnt = (value &
6521                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6522                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6523                 if (ded_cnt) {
6524                         dev_info(adev->dev, "GFX SubBlock %s, "
6525                                 "Instance[%d][%d], DED %d\n",
6526                                 gfx_v9_0_ras_fields[i].name,
6527                                 se_id, inst_id,
6528                                 ded_cnt);
6529                         *ded_count += ded_cnt;
6530                 }
6531         }
6532
6533         return 0;
6534 }
6535
6536 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6537 {
6538         int i, j, k;
6539
6540         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6541                 return;
6542
6543         /* read back registers to clear the counters */
6544         mutex_lock(&adev->grbm_idx_mutex);
6545         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6546                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6547                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6548                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6549                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6550                         }
6551                 }
6552         }
6553         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6554         mutex_unlock(&adev->grbm_idx_mutex);
6555
6556         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6557         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6558         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6559         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6560         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6561         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6562         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6563         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6564
6565         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6566                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6567                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6568         }
6569
6570         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6571                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6572                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6573         }
6574
6575         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6576                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6577                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6578         }
6579
6580         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6581                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6582                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6583         }
6584
6585         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6586         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6587         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6588         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6589 }
6590
6591 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6592                                           void *ras_error_status)
6593 {
6594         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6595         uint32_t sec_count = 0, ded_count = 0;
6596         uint32_t i, j, k;
6597         uint32_t reg_value;
6598
6599         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6600                 return -EINVAL;
6601
6602         err_data->ue_count = 0;
6603         err_data->ce_count = 0;
6604
6605         mutex_lock(&adev->grbm_idx_mutex);
6606
6607         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6608                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6609                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6610                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6611                                 reg_value =
6612                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6613                                 if (reg_value)
6614                                         gfx_v9_0_ras_error_count(adev,
6615                                                 &gfx_v9_0_edc_counter_regs[i],
6616                                                 j, k, reg_value,
6617                                                 &sec_count, &ded_count);
6618                         }
6619                 }
6620         }
6621
6622         err_data->ce_count += sec_count;
6623         err_data->ue_count += ded_count;
6624
6625         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6626         mutex_unlock(&adev->grbm_idx_mutex);
6627
6628         gfx_v9_0_query_utc_edc_status(adev, err_data);
6629
6630         return 0;
6631 }
6632
6633 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6634 {
6635         const unsigned int cp_coher_cntl =
6636                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6637                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6638                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6639                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6640                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6641
6642         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6643         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6644         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6645         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6646         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6647         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6648         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6649         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6650 }
6651
6652 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6653         .name = "gfx_v9_0",
6654         .early_init = gfx_v9_0_early_init,
6655         .late_init = gfx_v9_0_late_init,
6656         .sw_init = gfx_v9_0_sw_init,
6657         .sw_fini = gfx_v9_0_sw_fini,
6658         .hw_init = gfx_v9_0_hw_init,
6659         .hw_fini = gfx_v9_0_hw_fini,
6660         .suspend = gfx_v9_0_suspend,
6661         .resume = gfx_v9_0_resume,
6662         .is_idle = gfx_v9_0_is_idle,
6663         .wait_for_idle = gfx_v9_0_wait_for_idle,
6664         .soft_reset = gfx_v9_0_soft_reset,
6665         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6666         .set_powergating_state = gfx_v9_0_set_powergating_state,
6667         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6668 };
6669
6670 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6671         .type = AMDGPU_RING_TYPE_GFX,
6672         .align_mask = 0xff,
6673         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6674         .support_64bit_ptrs = true,
6675         .vmhub = AMDGPU_GFXHUB_0,
6676         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6677         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6678         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6679         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6680                 5 +  /* COND_EXEC */
6681                 7 +  /* PIPELINE_SYNC */
6682                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6683                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6684                 2 + /* VM_FLUSH */
6685                 8 +  /* FENCE for VM_FLUSH */
6686                 20 + /* GDS switch */
6687                 4 + /* double SWITCH_BUFFER,
6688                        the first COND_EXEC jump to the place just
6689                            prior to this double SWITCH_BUFFER  */
6690                 5 + /* COND_EXEC */
6691                 7 +      /*     HDP_flush */
6692                 4 +      /*     VGT_flush */
6693                 14 + /* CE_META */
6694                 31 + /* DE_META */
6695                 3 + /* CNTX_CTRL */
6696                 5 + /* HDP_INVL */
6697                 8 + 8 + /* FENCE x2 */
6698                 2 + /* SWITCH_BUFFER */
6699                 7, /* gfx_v9_0_emit_mem_sync */
6700         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6701         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6702         .emit_fence = gfx_v9_0_ring_emit_fence,
6703         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6704         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6705         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6706         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6707         .test_ring = gfx_v9_0_ring_test_ring,
6708         .test_ib = gfx_v9_0_ring_test_ib,
6709         .insert_nop = amdgpu_ring_insert_nop,
6710         .pad_ib = amdgpu_ring_generic_pad_ib,
6711         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6712         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6713         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6714         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6715         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6716         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6717         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6718         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6719         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6720         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6721 };
6722
6723 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6724         .type = AMDGPU_RING_TYPE_COMPUTE,
6725         .align_mask = 0xff,
6726         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6727         .support_64bit_ptrs = true,
6728         .vmhub = AMDGPU_GFXHUB_0,
6729         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6730         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6731         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6732         .emit_frame_size =
6733                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6734                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6735                 5 + /* hdp invalidate */
6736                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6737                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6738                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6739                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6740                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6741                 7, /* gfx_v9_0_emit_mem_sync */
6742         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6743         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6744         .emit_fence = gfx_v9_0_ring_emit_fence,
6745         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6746         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6747         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6748         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6749         .test_ring = gfx_v9_0_ring_test_ring,
6750         .test_ib = gfx_v9_0_ring_test_ib,
6751         .insert_nop = amdgpu_ring_insert_nop,
6752         .pad_ib = amdgpu_ring_generic_pad_ib,
6753         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6754         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6755         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6756         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6757 };
6758
6759 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6760         .type = AMDGPU_RING_TYPE_KIQ,
6761         .align_mask = 0xff,
6762         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6763         .support_64bit_ptrs = true,
6764         .vmhub = AMDGPU_GFXHUB_0,
6765         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6766         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6767         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6768         .emit_frame_size =
6769                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6770                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6771                 5 + /* hdp invalidate */
6772                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6773                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6774                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6775                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6776                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6777         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6778         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6779         .test_ring = gfx_v9_0_ring_test_ring,
6780         .insert_nop = amdgpu_ring_insert_nop,
6781         .pad_ib = amdgpu_ring_generic_pad_ib,
6782         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6783         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6784         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6785         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6786 };
6787
6788 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6789 {
6790         int i;
6791
6792         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6793
6794         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6795                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6796
6797         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6798                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6799 }
6800
6801 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6802         .set = gfx_v9_0_set_eop_interrupt_state,
6803         .process = gfx_v9_0_eop_irq,
6804 };
6805
6806 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6807         .set = gfx_v9_0_set_priv_reg_fault_state,
6808         .process = gfx_v9_0_priv_reg_irq,
6809 };
6810
6811 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6812         .set = gfx_v9_0_set_priv_inst_fault_state,
6813         .process = gfx_v9_0_priv_inst_irq,
6814 };
6815
6816 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6817         .set = gfx_v9_0_set_cp_ecc_error_state,
6818         .process = amdgpu_gfx_cp_ecc_error_irq,
6819 };
6820
6821
6822 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6823 {
6824         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6825         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6826
6827         adev->gfx.priv_reg_irq.num_types = 1;
6828         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6829
6830         adev->gfx.priv_inst_irq.num_types = 1;
6831         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6832
6833         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6834         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6835 }
6836
6837 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6838 {
6839         switch (adev->asic_type) {
6840         case CHIP_VEGA10:
6841         case CHIP_VEGA12:
6842         case CHIP_VEGA20:
6843         case CHIP_RAVEN:
6844         case CHIP_ARCTURUS:
6845         case CHIP_RENOIR:
6846                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6847                 break;
6848         default:
6849                 break;
6850         }
6851 }
6852
6853 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6854 {
6855         /* init asci gds info */
6856         switch (adev->asic_type) {
6857         case CHIP_VEGA10:
6858         case CHIP_VEGA12:
6859         case CHIP_VEGA20:
6860                 adev->gds.gds_size = 0x10000;
6861                 break;
6862         case CHIP_RAVEN:
6863         case CHIP_ARCTURUS:
6864                 adev->gds.gds_size = 0x1000;
6865                 break;
6866         default:
6867                 adev->gds.gds_size = 0x10000;
6868                 break;
6869         }
6870
6871         switch (adev->asic_type) {
6872         case CHIP_VEGA10:
6873         case CHIP_VEGA20:
6874                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6875                 break;
6876         case CHIP_VEGA12:
6877                 adev->gds.gds_compute_max_wave_id = 0x27f;
6878                 break;
6879         case CHIP_RAVEN:
6880                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
6881                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6882                 else
6883                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6884                 break;
6885         case CHIP_ARCTURUS:
6886                 adev->gds.gds_compute_max_wave_id = 0xfff;
6887                 break;
6888         default:
6889                 /* this really depends on the chip */
6890                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6891                 break;
6892         }
6893
6894         adev->gds.gws_size = 64;
6895         adev->gds.oa_size = 16;
6896 }
6897
6898 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6899                                                  u32 bitmap)
6900 {
6901         u32 data;
6902
6903         if (!bitmap)
6904                 return;
6905
6906         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6907         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6908
6909         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6910 }
6911
6912 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6913 {
6914         u32 data, mask;
6915
6916         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6917         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6918
6919         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6920         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6921
6922         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6923
6924         return (~data) & mask;
6925 }
6926
6927 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6928                                  struct amdgpu_cu_info *cu_info)
6929 {
6930         int i, j, k, counter, active_cu_number = 0;
6931         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6932         unsigned disable_masks[4 * 4];
6933
6934         if (!adev || !cu_info)
6935                 return -EINVAL;
6936
6937         /*
6938          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6939          */
6940         if (adev->gfx.config.max_shader_engines *
6941                 adev->gfx.config.max_sh_per_se > 16)
6942                 return -EINVAL;
6943
6944         amdgpu_gfx_parse_disable_cu(disable_masks,
6945                                     adev->gfx.config.max_shader_engines,
6946                                     adev->gfx.config.max_sh_per_se);
6947
6948         mutex_lock(&adev->grbm_idx_mutex);
6949         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6950                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6951                         mask = 1;
6952                         ao_bitmap = 0;
6953                         counter = 0;
6954                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6955                         gfx_v9_0_set_user_cu_inactive_bitmap(
6956                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6957                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6958
6959                         /*
6960                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6961                          * 4x4 size array, and it's usually suitable for Vega
6962                          * ASICs which has 4*2 SE/SH layout.
6963                          * But for Arcturus, SE/SH layout is changed to 8*1.
6964                          * To mostly reduce the impact, we make it compatible
6965                          * with current bitmap array as below:
6966                          *    SE4,SH0 --> bitmap[0][1]
6967                          *    SE5,SH0 --> bitmap[1][1]
6968                          *    SE6,SH0 --> bitmap[2][1]
6969                          *    SE7,SH0 --> bitmap[3][1]
6970                          */
6971                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6972
6973                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6974                                 if (bitmap & mask) {
6975                                         if (counter < adev->gfx.config.max_cu_per_sh)
6976                                                 ao_bitmap |= mask;
6977                                         counter ++;
6978                                 }
6979                                 mask <<= 1;
6980                         }
6981                         active_cu_number += counter;
6982                         if (i < 2 && j < 2)
6983                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6984                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6985                 }
6986         }
6987         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6988         mutex_unlock(&adev->grbm_idx_mutex);
6989
6990         cu_info->number = active_cu_number;
6991         cu_info->ao_cu_mask = ao_cu_mask;
6992         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6993
6994         return 0;
6995 }
6996
6997 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6998 {
6999         .type = AMD_IP_BLOCK_TYPE_GFX,
7000         .major = 9,
7001         .minor = 0,
7002         .rev = 0,
7003         .funcs = &gfx_v9_0_ip_funcs,
7004 };