Merge back cpufreq material for 6.3-rc1.
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58
59 #define GFX9_NUM_GFX_RINGS     1
60 #define GFX9_NUM_SW_GFX_RINGS  2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133
134 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
136 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
138 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
140 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
142 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
144 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
146
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151
152 enum ta_ras_gfx_subblock {
153         /*CPC*/
154         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
155         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
156         TA_RAS_BLOCK__GFX_CPC_UCODE,
157         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
158         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
159         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
160         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
161         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
162         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
163         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
164         /* CPF*/
165         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
166         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
167         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
168         TA_RAS_BLOCK__GFX_CPF_TAG,
169         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
170         /* CPG*/
171         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
172         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
173         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
174         TA_RAS_BLOCK__GFX_CPG_TAG,
175         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
176         /* GDS*/
177         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
178         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
179         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
180         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
181         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
182         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
183         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
184         /* SPI*/
185         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
186         /* SQ*/
187         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
188         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
189         TA_RAS_BLOCK__GFX_SQ_LDS_D,
190         TA_RAS_BLOCK__GFX_SQ_LDS_I,
191         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
192         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
193         /* SQC (3 ranges)*/
194         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195         /* SQC range 0*/
196         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
197         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
198                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
199         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
200         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
201         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
202         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
203         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
204         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
206                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
207         /* SQC range 1*/
208         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
210                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
211         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
213         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
215         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
216         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
217         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
218         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
220                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
221         /* SQC range 2*/
222         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
224                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
225         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
226         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
227         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
228         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
229         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
230         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
231         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
232         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
234                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
235         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
236         /* TA*/
237         TA_RAS_BLOCK__GFX_TA_INDEX_START,
238         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
239         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
240         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
241         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
242         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
243         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
244         /* TCA*/
245         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
246         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
247         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
248         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
249         /* TCC (5 sub-ranges)*/
250         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251         /* TCC range 0*/
252         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
253         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
254         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
255         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
256         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
257         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
258         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
259         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
260         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
261         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
262         /* TCC range 1*/
263         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
264         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
265         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
267                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
268         /* TCC range 2*/
269         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
270         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
271         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
272         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
273         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
274         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
275         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
276         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
277         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
279                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
280         /* TCC range 3*/
281         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
282         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
283         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
285                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
286         /* TCC range 4*/
287         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
289                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
290         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
292                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
293         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
294         /* TCI*/
295         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
296         /* TCP*/
297         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
298         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
299         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
300         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
301         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
302         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
303         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
304         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
305         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
306         /* TD*/
307         TA_RAS_BLOCK__GFX_TD_INDEX_START,
308         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
309         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
310         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
311         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
312         /* EA (3 sub-ranges)*/
313         TA_RAS_BLOCK__GFX_EA_INDEX_START,
314         /* EA range 0*/
315         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
316         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
317         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
318         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
319         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
320         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
321         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
322         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
323         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
324         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
325         /* EA range 1*/
326         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
327         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
328         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
329         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
330         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
331         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
332         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
333         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
334         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
335         /* EA range 2*/
336         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
337         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
338         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
339         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
340         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
341         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
342         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
343         /* UTC VM L2 bank*/
344         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
345         /* UTC VM walker*/
346         TA_RAS_BLOCK__UTC_VML2_WALKER,
347         /* UTC ATC L2 2MB cache*/
348         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
349         /* UTC ATC L2 4KB cache*/
350         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
351         TA_RAS_BLOCK__GFX_MAX
352 };
353
354 struct ras_gfx_subblock {
355         unsigned char *name;
356         int ta_subblock;
357         int hw_supported_error_type;
358         int sw_supported_error_type;
359 };
360
361 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
362         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
363                 #subblock,                                                     \
364                 TA_RAS_BLOCK__##subblock,                                      \
365                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
366                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
367         }
368
369 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
370         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
371         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
372         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
381         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
383         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
384         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
387                              0),
388         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
389                              0),
390         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
392         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
394         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398                              0, 0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402                              0, 0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
406                              0, 0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410                              1),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412                              0, 0, 0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424                              0, 0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426                              0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
430                              0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
436                              0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
438                              0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
440                              0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
442                              0, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
454                              1),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
456                              1),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
458                              1),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
460                              0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
475                              0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
478                              0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
480                              0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
482                              0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
485         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
492         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
512         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
514         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
515         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
516         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
517 };
518
519 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
520 {
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
541 };
542
543 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
544 {
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
563 };
564
565 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
566 {
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
578 };
579
580 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
581 {
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
606 };
607
608 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
609 {
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
617 };
618
619 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
620 {
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
640 };
641
642 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
643 {
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
656 };
657
658 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
659 {
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
663 };
664
665 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
666 {
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
683 };
684
685 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
686 {
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
700 };
701
702 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
703 {
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
708         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
709         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
710         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
711         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
712         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
713         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
714         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
715 };
716
717 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
718         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
719         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
720 };
721
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
723 {
724         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732 };
733
734 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
735 {
736         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744 };
745
746 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
747 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
748 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
749 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
750
751 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
752 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
753 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
754 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
756                                 struct amdgpu_cu_info *cu_info);
757 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
758 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
759 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
760 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
761                                           void *ras_error_status);
762 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
763                                      void *inject_if);
764 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
765
766 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
767                                 uint64_t queue_mask)
768 {
769         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
770         amdgpu_ring_write(kiq_ring,
771                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
772                 /* vmid_mask:0* queue_type:0 (KIQ) */
773                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
774         amdgpu_ring_write(kiq_ring,
775                         lower_32_bits(queue_mask));     /* queue mask lo */
776         amdgpu_ring_write(kiq_ring,
777                         upper_32_bits(queue_mask));     /* queue mask hi */
778         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
779         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
780         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
781         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
782 }
783
784 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
785                                  struct amdgpu_ring *ring)
786 {
787         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
788         uint64_t wptr_addr = ring->wptr_gpu_addr;
789         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
790
791         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
792         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
793         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
794                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
795                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
796                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
797                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
798                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
799                          /*queue_type: normal compute queue */
800                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
801                          /* alloc format: all_on_one_pipe */
802                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
803                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
804                          /* num_queues: must be 1 */
805                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
806         amdgpu_ring_write(kiq_ring,
807                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
808         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
809         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
810         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
811         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
812 }
813
814 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
815                                    struct amdgpu_ring *ring,
816                                    enum amdgpu_unmap_queues_action action,
817                                    u64 gpu_addr, u64 seq)
818 {
819         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
820
821         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
822         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
823                           PACKET3_UNMAP_QUEUES_ACTION(action) |
824                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
825                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
826                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
827         amdgpu_ring_write(kiq_ring,
828                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
829
830         if (action == PREEMPT_QUEUES_NO_UNMAP) {
831                 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
832                 amdgpu_ring_write(kiq_ring, 0);
833                 amdgpu_ring_write(kiq_ring, 0);
834
835         } else {
836                 amdgpu_ring_write(kiq_ring, 0);
837                 amdgpu_ring_write(kiq_ring, 0);
838                 amdgpu_ring_write(kiq_ring, 0);
839         }
840 }
841
842 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
843                                    struct amdgpu_ring *ring,
844                                    u64 addr,
845                                    u64 seq)
846 {
847         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
848
849         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
850         amdgpu_ring_write(kiq_ring,
851                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
852                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
853                           PACKET3_QUERY_STATUS_COMMAND(2));
854         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
855         amdgpu_ring_write(kiq_ring,
856                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
857                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
858         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
859         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
860         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
861         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
862 }
863
864 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
865                                 uint16_t pasid, uint32_t flush_type,
866                                 bool all_hub)
867 {
868         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
869         amdgpu_ring_write(kiq_ring,
870                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
871                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
872                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
873                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
874 }
875
876 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
877         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
878         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
879         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
880         .kiq_query_status = gfx_v9_0_kiq_query_status,
881         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
882         .set_resources_size = 8,
883         .map_queues_size = 7,
884         .unmap_queues_size = 6,
885         .query_status_size = 7,
886         .invalidate_tlbs_size = 2,
887 };
888
889 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
890 {
891         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
892 }
893
894 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
895 {
896         switch (adev->ip_versions[GC_HWIP][0]) {
897         case IP_VERSION(9, 0, 1):
898                 soc15_program_register_sequence(adev,
899                                                 golden_settings_gc_9_0,
900                                                 ARRAY_SIZE(golden_settings_gc_9_0));
901                 soc15_program_register_sequence(adev,
902                                                 golden_settings_gc_9_0_vg10,
903                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
904                 break;
905         case IP_VERSION(9, 2, 1):
906                 soc15_program_register_sequence(adev,
907                                                 golden_settings_gc_9_2_1,
908                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
909                 soc15_program_register_sequence(adev,
910                                                 golden_settings_gc_9_2_1_vg12,
911                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
912                 break;
913         case IP_VERSION(9, 4, 0):
914                 soc15_program_register_sequence(adev,
915                                                 golden_settings_gc_9_0,
916                                                 ARRAY_SIZE(golden_settings_gc_9_0));
917                 soc15_program_register_sequence(adev,
918                                                 golden_settings_gc_9_0_vg20,
919                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
920                 break;
921         case IP_VERSION(9, 4, 1):
922                 soc15_program_register_sequence(adev,
923                                                 golden_settings_gc_9_4_1_arct,
924                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
925                 break;
926         case IP_VERSION(9, 2, 2):
927         case IP_VERSION(9, 1, 0):
928                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
929                                                 ARRAY_SIZE(golden_settings_gc_9_1));
930                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
931                         soc15_program_register_sequence(adev,
932                                                         golden_settings_gc_9_1_rv2,
933                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
934                 else
935                         soc15_program_register_sequence(adev,
936                                                         golden_settings_gc_9_1_rv1,
937                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
938                 break;
939          case IP_VERSION(9, 3, 0):
940                 soc15_program_register_sequence(adev,
941                                                 golden_settings_gc_9_1_rn,
942                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
943                 return; /* for renoir, don't need common goldensetting */
944         case IP_VERSION(9, 4, 2):
945                 gfx_v9_4_2_init_golden_registers(adev,
946                                                  adev->smuio.funcs->get_die_id(adev));
947                 break;
948         default:
949                 break;
950         }
951
952         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
953             (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
954                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
955                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
956 }
957
958 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
959                                        bool wc, uint32_t reg, uint32_t val)
960 {
961         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
962         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
963                                 WRITE_DATA_DST_SEL(0) |
964                                 (wc ? WR_CONFIRM : 0));
965         amdgpu_ring_write(ring, reg);
966         amdgpu_ring_write(ring, 0);
967         amdgpu_ring_write(ring, val);
968 }
969
970 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
971                                   int mem_space, int opt, uint32_t addr0,
972                                   uint32_t addr1, uint32_t ref, uint32_t mask,
973                                   uint32_t inv)
974 {
975         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
976         amdgpu_ring_write(ring,
977                                  /* memory (1) or register (0) */
978                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
979                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
980                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
981                                  WAIT_REG_MEM_ENGINE(eng_sel)));
982
983         if (mem_space)
984                 BUG_ON(addr0 & 0x3); /* Dword align */
985         amdgpu_ring_write(ring, addr0);
986         amdgpu_ring_write(ring, addr1);
987         amdgpu_ring_write(ring, ref);
988         amdgpu_ring_write(ring, mask);
989         amdgpu_ring_write(ring, inv); /* poll interval */
990 }
991
992 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
993 {
994         struct amdgpu_device *adev = ring->adev;
995         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
996         uint32_t tmp = 0;
997         unsigned i;
998         int r;
999
1000         WREG32(scratch, 0xCAFEDEAD);
1001         r = amdgpu_ring_alloc(ring, 3);
1002         if (r)
1003                 return r;
1004
1005         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1006         amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1007         amdgpu_ring_write(ring, 0xDEADBEEF);
1008         amdgpu_ring_commit(ring);
1009
1010         for (i = 0; i < adev->usec_timeout; i++) {
1011                 tmp = RREG32(scratch);
1012                 if (tmp == 0xDEADBEEF)
1013                         break;
1014                 udelay(1);
1015         }
1016
1017         if (i >= adev->usec_timeout)
1018                 r = -ETIMEDOUT;
1019         return r;
1020 }
1021
1022 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1023 {
1024         struct amdgpu_device *adev = ring->adev;
1025         struct amdgpu_ib ib;
1026         struct dma_fence *f = NULL;
1027
1028         unsigned index;
1029         uint64_t gpu_addr;
1030         uint32_t tmp;
1031         long r;
1032
1033         r = amdgpu_device_wb_get(adev, &index);
1034         if (r)
1035                 return r;
1036
1037         gpu_addr = adev->wb.gpu_addr + (index * 4);
1038         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1039         memset(&ib, 0, sizeof(ib));
1040         r = amdgpu_ib_get(adev, NULL, 16,
1041                                         AMDGPU_IB_POOL_DIRECT, &ib);
1042         if (r)
1043                 goto err1;
1044
1045         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1046         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1047         ib.ptr[2] = lower_32_bits(gpu_addr);
1048         ib.ptr[3] = upper_32_bits(gpu_addr);
1049         ib.ptr[4] = 0xDEADBEEF;
1050         ib.length_dw = 5;
1051
1052         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1053         if (r)
1054                 goto err2;
1055
1056         r = dma_fence_wait_timeout(f, false, timeout);
1057         if (r == 0) {
1058                 r = -ETIMEDOUT;
1059                 goto err2;
1060         } else if (r < 0) {
1061                 goto err2;
1062         }
1063
1064         tmp = adev->wb.wb[index];
1065         if (tmp == 0xDEADBEEF)
1066                 r = 0;
1067         else
1068                 r = -EINVAL;
1069
1070 err2:
1071         amdgpu_ib_free(adev, &ib, NULL);
1072         dma_fence_put(f);
1073 err1:
1074         amdgpu_device_wb_free(adev, index);
1075         return r;
1076 }
1077
1078
1079 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1080 {
1081         release_firmware(adev->gfx.pfp_fw);
1082         adev->gfx.pfp_fw = NULL;
1083         release_firmware(adev->gfx.me_fw);
1084         adev->gfx.me_fw = NULL;
1085         release_firmware(adev->gfx.ce_fw);
1086         adev->gfx.ce_fw = NULL;
1087         release_firmware(adev->gfx.rlc_fw);
1088         adev->gfx.rlc_fw = NULL;
1089         release_firmware(adev->gfx.mec_fw);
1090         adev->gfx.mec_fw = NULL;
1091         release_firmware(adev->gfx.mec2_fw);
1092         adev->gfx.mec2_fw = NULL;
1093
1094         kfree(adev->gfx.rlc.register_list_format);
1095 }
1096
1097 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1098 {
1099         adev->gfx.me_fw_write_wait = false;
1100         adev->gfx.mec_fw_write_wait = false;
1101
1102         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1103             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1104             (adev->gfx.mec_feature_version < 46) ||
1105             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1106             (adev->gfx.pfp_feature_version < 46)))
1107                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1108
1109         switch (adev->ip_versions[GC_HWIP][0]) {
1110         case IP_VERSION(9, 0, 1):
1111                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1112                     (adev->gfx.me_feature_version >= 42) &&
1113                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1114                     (adev->gfx.pfp_feature_version >= 42))
1115                         adev->gfx.me_fw_write_wait = true;
1116
1117                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1118                     (adev->gfx.mec_feature_version >= 42))
1119                         adev->gfx.mec_fw_write_wait = true;
1120                 break;
1121         case IP_VERSION(9, 2, 1):
1122                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1123                     (adev->gfx.me_feature_version >= 44) &&
1124                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1125                     (adev->gfx.pfp_feature_version >= 44))
1126                         adev->gfx.me_fw_write_wait = true;
1127
1128                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1129                     (adev->gfx.mec_feature_version >= 44))
1130                         adev->gfx.mec_fw_write_wait = true;
1131                 break;
1132         case IP_VERSION(9, 4, 0):
1133                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1134                     (adev->gfx.me_feature_version >= 44) &&
1135                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1136                     (adev->gfx.pfp_feature_version >= 44))
1137                         adev->gfx.me_fw_write_wait = true;
1138
1139                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1140                     (adev->gfx.mec_feature_version >= 44))
1141                         adev->gfx.mec_fw_write_wait = true;
1142                 break;
1143         case IP_VERSION(9, 1, 0):
1144         case IP_VERSION(9, 2, 2):
1145                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1146                     (adev->gfx.me_feature_version >= 42) &&
1147                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1148                     (adev->gfx.pfp_feature_version >= 42))
1149                         adev->gfx.me_fw_write_wait = true;
1150
1151                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1152                     (adev->gfx.mec_feature_version >= 42))
1153                         adev->gfx.mec_fw_write_wait = true;
1154                 break;
1155         default:
1156                 adev->gfx.me_fw_write_wait = true;
1157                 adev->gfx.mec_fw_write_wait = true;
1158                 break;
1159         }
1160 }
1161
1162 struct amdgpu_gfxoff_quirk {
1163         u16 chip_vendor;
1164         u16 chip_device;
1165         u16 subsys_vendor;
1166         u16 subsys_device;
1167         u8 revision;
1168 };
1169
1170 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1171         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1172         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1173         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1174         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1175         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1176         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1177         /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1178         { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1179         { 0, 0, 0, 0, 0 },
1180 };
1181
1182 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1183 {
1184         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1185
1186         while (p && p->chip_device != 0) {
1187                 if (pdev->vendor == p->chip_vendor &&
1188                     pdev->device == p->chip_device &&
1189                     pdev->subsystem_vendor == p->subsys_vendor &&
1190                     pdev->subsystem_device == p->subsys_device &&
1191                     pdev->revision == p->revision) {
1192                         return true;
1193                 }
1194                 ++p;
1195         }
1196         return false;
1197 }
1198
1199 static bool is_raven_kicker(struct amdgpu_device *adev)
1200 {
1201         if (adev->pm.fw_version >= 0x41e2b)
1202                 return true;
1203         else
1204                 return false;
1205 }
1206
1207 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1208 {
1209         if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1210             (adev->gfx.me_fw_version >= 0x000000a5) &&
1211             (adev->gfx.me_feature_version >= 52))
1212                 return true;
1213         else
1214                 return false;
1215 }
1216
1217 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1218 {
1219         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1220                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1221
1222         switch (adev->ip_versions[GC_HWIP][0]) {
1223         case IP_VERSION(9, 0, 1):
1224         case IP_VERSION(9, 2, 1):
1225         case IP_VERSION(9, 4, 0):
1226                 break;
1227         case IP_VERSION(9, 2, 2):
1228         case IP_VERSION(9, 1, 0):
1229                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1230                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1231                     ((!is_raven_kicker(adev) &&
1232                       adev->gfx.rlc_fw_version < 531) ||
1233                      (adev->gfx.rlc_feature_version < 1) ||
1234                      !adev->gfx.rlc.is_rlc_v2_1))
1235                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1236
1237                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1238                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1239                                 AMD_PG_SUPPORT_CP |
1240                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1241                 break;
1242         case IP_VERSION(9, 3, 0):
1243                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1244                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1245                                 AMD_PG_SUPPORT_CP |
1246                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1247                 break;
1248         default:
1249                 break;
1250         }
1251 }
1252
1253 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1254                                           const char *chip_name)
1255 {
1256         char fw_name[30];
1257         int err;
1258
1259         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1260         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1261         if (err)
1262                 goto out;
1263         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1264         if (err)
1265                 goto out;
1266         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1267
1268         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1269         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1270         if (err)
1271                 goto out;
1272         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1273         if (err)
1274                 goto out;
1275         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1276
1277         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1278         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1279         if (err)
1280                 goto out;
1281         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1282         if (err)
1283                 goto out;
1284         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1285
1286 out:
1287         if (err) {
1288                 dev_err(adev->dev,
1289                         "gfx9: Failed to init firmware \"%s\"\n",
1290                         fw_name);
1291                 release_firmware(adev->gfx.pfp_fw);
1292                 adev->gfx.pfp_fw = NULL;
1293                 release_firmware(adev->gfx.me_fw);
1294                 adev->gfx.me_fw = NULL;
1295                 release_firmware(adev->gfx.ce_fw);
1296                 adev->gfx.ce_fw = NULL;
1297         }
1298         return err;
1299 }
1300
1301 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1302                                           const char *chip_name)
1303 {
1304         char fw_name[30];
1305         int err;
1306         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1307         uint16_t version_major;
1308         uint16_t version_minor;
1309         uint32_t smu_version;
1310
1311         /*
1312          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1313          * instead of picasso_rlc.bin.
1314          * Judgment method:
1315          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1316          *          or revision >= 0xD8 && revision <= 0xDF
1317          * otherwise is PCO FP5
1318          */
1319         if (!strcmp(chip_name, "picasso") &&
1320                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1321                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1322                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1323         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1324                 (smu_version >= 0x41e2b))
1325                 /**
1326                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1327                 */
1328                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1329         else
1330                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1331         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1332         if (err)
1333                 goto out;
1334         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1335         if (err)
1336                 goto out;
1337         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1338
1339         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1340         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1341         err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1342 out:
1343         if (err) {
1344                 dev_err(adev->dev,
1345                         "gfx9: Failed to init firmware \"%s\"\n",
1346                         fw_name);
1347                 release_firmware(adev->gfx.rlc_fw);
1348                 adev->gfx.rlc_fw = NULL;
1349         }
1350         return err;
1351 }
1352
1353 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1354 {
1355         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1356             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1357             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1358                 return false;
1359
1360         return true;
1361 }
1362
1363 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1364                                           const char *chip_name)
1365 {
1366         char fw_name[30];
1367         int err;
1368
1369         if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1370                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1371         else
1372                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1373
1374         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1375         if (err)
1376                 goto out;
1377         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1378         if (err)
1379                 goto out;
1380         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1381         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1382
1383         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1384                 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1385                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1386                 else
1387                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1388
1389                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1390                 if (!err) {
1391                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1392                         if (err)
1393                                 goto out;
1394                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1395                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1396                 } else {
1397                         err = 0;
1398                         adev->gfx.mec2_fw = NULL;
1399                 }
1400         } else {
1401                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1402                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1403         }
1404
1405 out:
1406         gfx_v9_0_check_if_need_gfxoff(adev);
1407         gfx_v9_0_check_fw_write_wait(adev);
1408         if (err) {
1409                 dev_err(adev->dev,
1410                         "gfx9: Failed to init firmware \"%s\"\n",
1411                         fw_name);
1412                 release_firmware(adev->gfx.mec_fw);
1413                 adev->gfx.mec_fw = NULL;
1414                 release_firmware(adev->gfx.mec2_fw);
1415                 adev->gfx.mec2_fw = NULL;
1416         }
1417         return err;
1418 }
1419
1420 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1421 {
1422         const char *chip_name;
1423         int r;
1424
1425         DRM_DEBUG("\n");
1426
1427         switch (adev->ip_versions[GC_HWIP][0]) {
1428         case IP_VERSION(9, 0, 1):
1429                 chip_name = "vega10";
1430                 break;
1431         case IP_VERSION(9, 2, 1):
1432                 chip_name = "vega12";
1433                 break;
1434         case IP_VERSION(9, 4, 0):
1435                 chip_name = "vega20";
1436                 break;
1437         case IP_VERSION(9, 2, 2):
1438         case IP_VERSION(9, 1, 0):
1439                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1440                         chip_name = "raven2";
1441                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1442                         chip_name = "picasso";
1443                 else
1444                         chip_name = "raven";
1445                 break;
1446         case IP_VERSION(9, 4, 1):
1447                 chip_name = "arcturus";
1448                 break;
1449         case IP_VERSION(9, 3, 0):
1450                 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1451                         chip_name = "renoir";
1452                 else
1453                         chip_name = "green_sardine";
1454                 break;
1455         case IP_VERSION(9, 4, 2):
1456                 chip_name = "aldebaran";
1457                 break;
1458         default:
1459                 BUG();
1460         }
1461
1462         /* No CPG in Arcturus */
1463         if (adev->gfx.num_gfx_rings) {
1464                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1465                 if (r)
1466                         return r;
1467         }
1468
1469         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1470         if (r)
1471                 return r;
1472
1473         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1474         if (r)
1475                 return r;
1476
1477         return r;
1478 }
1479
1480 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1481 {
1482         u32 count = 0;
1483         const struct cs_section_def *sect = NULL;
1484         const struct cs_extent_def *ext = NULL;
1485
1486         /* begin clear state */
1487         count += 2;
1488         /* context control state */
1489         count += 3;
1490
1491         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1492                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1493                         if (sect->id == SECT_CONTEXT)
1494                                 count += 2 + ext->reg_count;
1495                         else
1496                                 return 0;
1497                 }
1498         }
1499
1500         /* end clear state */
1501         count += 2;
1502         /* clear state */
1503         count += 2;
1504
1505         return count;
1506 }
1507
1508 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1509                                     volatile u32 *buffer)
1510 {
1511         u32 count = 0, i;
1512         const struct cs_section_def *sect = NULL;
1513         const struct cs_extent_def *ext = NULL;
1514
1515         if (adev->gfx.rlc.cs_data == NULL)
1516                 return;
1517         if (buffer == NULL)
1518                 return;
1519
1520         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1521         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1522
1523         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1524         buffer[count++] = cpu_to_le32(0x80000000);
1525         buffer[count++] = cpu_to_le32(0x80000000);
1526
1527         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1528                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1529                         if (sect->id == SECT_CONTEXT) {
1530                                 buffer[count++] =
1531                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1532                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1533                                                 PACKET3_SET_CONTEXT_REG_START);
1534                                 for (i = 0; i < ext->reg_count; i++)
1535                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1536                         } else {
1537                                 return;
1538                         }
1539                 }
1540         }
1541
1542         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1543         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1544
1545         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1546         buffer[count++] = cpu_to_le32(0);
1547 }
1548
1549 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1550 {
1551         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1552         uint32_t pg_always_on_cu_num = 2;
1553         uint32_t always_on_cu_num;
1554         uint32_t i, j, k;
1555         uint32_t mask, cu_bitmap, counter;
1556
1557         if (adev->flags & AMD_IS_APU)
1558                 always_on_cu_num = 4;
1559         else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1560                 always_on_cu_num = 8;
1561         else
1562                 always_on_cu_num = 12;
1563
1564         mutex_lock(&adev->grbm_idx_mutex);
1565         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1566                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1567                         mask = 1;
1568                         cu_bitmap = 0;
1569                         counter = 0;
1570                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
1571
1572                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1573                                 if (cu_info->bitmap[i][j] & mask) {
1574                                         if (counter == pg_always_on_cu_num)
1575                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1576                                         if (counter < always_on_cu_num)
1577                                                 cu_bitmap |= mask;
1578                                         else
1579                                                 break;
1580                                         counter++;
1581                                 }
1582                                 mask <<= 1;
1583                         }
1584
1585                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1586                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1587                 }
1588         }
1589         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1590         mutex_unlock(&adev->grbm_idx_mutex);
1591 }
1592
1593 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1594 {
1595         uint32_t data;
1596
1597         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1598         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1599         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1600         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1601         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1602
1603         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1604         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1605
1606         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1607         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1608
1609         mutex_lock(&adev->grbm_idx_mutex);
1610         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1611         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1612         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1613
1614         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1615         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1616         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1617         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1618         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1619
1620         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1621         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1622         data &= 0x0000FFFF;
1623         data |= 0x00C00000;
1624         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1625
1626         /*
1627          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1628          * programmed in gfx_v9_0_init_always_on_cu_mask()
1629          */
1630
1631         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1632          * but used for RLC_LB_CNTL configuration */
1633         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1634         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1635         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1636         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1637         mutex_unlock(&adev->grbm_idx_mutex);
1638
1639         gfx_v9_0_init_always_on_cu_mask(adev);
1640 }
1641
1642 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1643 {
1644         uint32_t data;
1645
1646         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1647         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1648         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1649         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1650         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1651
1652         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1653         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1654
1655         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1656         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1657
1658         mutex_lock(&adev->grbm_idx_mutex);
1659         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1660         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1661         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1662
1663         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1664         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1665         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1666         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1667         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1668
1669         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1670         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1671         data &= 0x0000FFFF;
1672         data |= 0x00C00000;
1673         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1674
1675         /*
1676          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1677          * programmed in gfx_v9_0_init_always_on_cu_mask()
1678          */
1679
1680         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1681          * but used for RLC_LB_CNTL configuration */
1682         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1683         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1684         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1685         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1686         mutex_unlock(&adev->grbm_idx_mutex);
1687
1688         gfx_v9_0_init_always_on_cu_mask(adev);
1689 }
1690
1691 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1692 {
1693         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1694 }
1695
1696 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1697 {
1698         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1699                 return 5;
1700         else
1701                 return 4;
1702 }
1703
1704 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1705 {
1706         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1707
1708         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1709         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1710         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1711         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1712         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1713         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1714         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1715         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1716         adev->gfx.rlc.rlcg_reg_access_supported = true;
1717 }
1718
1719 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1720 {
1721         const struct cs_section_def *cs_data;
1722         int r;
1723
1724         adev->gfx.rlc.cs_data = gfx9_cs_data;
1725
1726         cs_data = adev->gfx.rlc.cs_data;
1727
1728         if (cs_data) {
1729                 /* init clear state block */
1730                 r = amdgpu_gfx_rlc_init_csb(adev);
1731                 if (r)
1732                         return r;
1733         }
1734
1735         if (adev->flags & AMD_IS_APU) {
1736                 /* TODO: double check the cp_table_size for RV */
1737                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1738                 r = amdgpu_gfx_rlc_init_cpt(adev);
1739                 if (r)
1740                         return r;
1741         }
1742
1743         switch (adev->ip_versions[GC_HWIP][0]) {
1744         case IP_VERSION(9, 2, 2):
1745         case IP_VERSION(9, 1, 0):
1746                 gfx_v9_0_init_lbpw(adev);
1747                 break;
1748         case IP_VERSION(9, 4, 0):
1749                 gfx_v9_4_init_lbpw(adev);
1750                 break;
1751         default:
1752                 break;
1753         }
1754
1755         /* init spm vmid with 0xf */
1756         if (adev->gfx.rlc.funcs->update_spm_vmid)
1757                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1758
1759         return 0;
1760 }
1761
1762 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1763 {
1764         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1765         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1766 }
1767
1768 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1769 {
1770         int r;
1771         u32 *hpd;
1772         const __le32 *fw_data;
1773         unsigned fw_size;
1774         u32 *fw;
1775         size_t mec_hpd_size;
1776
1777         const struct gfx_firmware_header_v1_0 *mec_hdr;
1778
1779         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1780
1781         /* take ownership of the relevant compute queues */
1782         amdgpu_gfx_compute_queue_acquire(adev);
1783         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1784         if (mec_hpd_size) {
1785                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1786                                               AMDGPU_GEM_DOMAIN_VRAM,
1787                                               &adev->gfx.mec.hpd_eop_obj,
1788                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1789                                               (void **)&hpd);
1790                 if (r) {
1791                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1792                         gfx_v9_0_mec_fini(adev);
1793                         return r;
1794                 }
1795
1796                 memset(hpd, 0, mec_hpd_size);
1797
1798                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1799                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1800         }
1801
1802         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1803
1804         fw_data = (const __le32 *)
1805                 (adev->gfx.mec_fw->data +
1806                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1807         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1808
1809         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1810                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1811                                       &adev->gfx.mec.mec_fw_obj,
1812                                       &adev->gfx.mec.mec_fw_gpu_addr,
1813                                       (void **)&fw);
1814         if (r) {
1815                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1816                 gfx_v9_0_mec_fini(adev);
1817                 return r;
1818         }
1819
1820         memcpy(fw, fw_data, fw_size);
1821
1822         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1823         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1824
1825         return 0;
1826 }
1827
1828 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1829 {
1830         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1831                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1832                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1833                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1834                 (SQ_IND_INDEX__FORCE_READ_MASK));
1835         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1836 }
1837
1838 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1839                            uint32_t wave, uint32_t thread,
1840                            uint32_t regno, uint32_t num, uint32_t *out)
1841 {
1842         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1843                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1844                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1845                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1846                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1847                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1848                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1849         while (num--)
1850                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1851 }
1852
1853 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1854 {
1855         /* type 1 wave data */
1856         dst[(*no_fields)++] = 1;
1857         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1858         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1859         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1860         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1861         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1862         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1863         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1864         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1865         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1866         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1867         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1868         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1869         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1870         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1871         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1872 }
1873
1874 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1875                                      uint32_t wave, uint32_t start,
1876                                      uint32_t size, uint32_t *dst)
1877 {
1878         wave_read_regs(
1879                 adev, simd, wave, 0,
1880                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1881 }
1882
1883 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1884                                      uint32_t wave, uint32_t thread,
1885                                      uint32_t start, uint32_t size,
1886                                      uint32_t *dst)
1887 {
1888         wave_read_regs(
1889                 adev, simd, wave, thread,
1890                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1891 }
1892
1893 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1894                                   u32 me, u32 pipe, u32 q, u32 vm)
1895 {
1896         soc15_grbm_select(adev, me, pipe, q, vm);
1897 }
1898
1899 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1900         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1901         .select_se_sh = &gfx_v9_0_select_se_sh,
1902         .read_wave_data = &gfx_v9_0_read_wave_data,
1903         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1904         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1905         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1906 };
1907
1908 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1909                 .ras_error_inject = &gfx_v9_0_ras_error_inject,
1910                 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1911                 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1912 };
1913
1914 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1915         .ras_block = {
1916                 .hw_ops = &gfx_v9_0_ras_ops,
1917         },
1918 };
1919
1920 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1921 {
1922         u32 gb_addr_config;
1923         int err;
1924
1925         switch (adev->ip_versions[GC_HWIP][0]) {
1926         case IP_VERSION(9, 0, 1):
1927                 adev->gfx.config.max_hw_contexts = 8;
1928                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1929                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1930                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1931                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1932                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1933                 break;
1934         case IP_VERSION(9, 2, 1):
1935                 adev->gfx.config.max_hw_contexts = 8;
1936                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1937                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1938                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1939                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1940                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1941                 DRM_INFO("fix gfx.config for vega12\n");
1942                 break;
1943         case IP_VERSION(9, 4, 0):
1944                 adev->gfx.ras = &gfx_v9_0_ras;
1945                 adev->gfx.config.max_hw_contexts = 8;
1946                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1947                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1948                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1949                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1950                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1951                 gb_addr_config &= ~0xf3e777ff;
1952                 gb_addr_config |= 0x22014042;
1953                 /* check vbios table if gpu info is not available */
1954                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1955                 if (err)
1956                         return err;
1957                 break;
1958         case IP_VERSION(9, 2, 2):
1959         case IP_VERSION(9, 1, 0):
1960                 adev->gfx.config.max_hw_contexts = 8;
1961                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1962                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1963                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1964                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1965                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1966                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1967                 else
1968                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1969                 break;
1970         case IP_VERSION(9, 4, 1):
1971                 adev->gfx.ras = &gfx_v9_4_ras;
1972                 adev->gfx.config.max_hw_contexts = 8;
1973                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1974                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1975                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1976                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1977                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1978                 gb_addr_config &= ~0xf3e777ff;
1979                 gb_addr_config |= 0x22014042;
1980                 break;
1981         case IP_VERSION(9, 3, 0):
1982                 adev->gfx.config.max_hw_contexts = 8;
1983                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1984                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1985                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1986                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1987                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1988                 gb_addr_config &= ~0xf3e777ff;
1989                 gb_addr_config |= 0x22010042;
1990                 break;
1991         case IP_VERSION(9, 4, 2):
1992                 adev->gfx.ras = &gfx_v9_4_2_ras;
1993                 adev->gfx.config.max_hw_contexts = 8;
1994                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1995                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1996                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1997                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1998                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1999                 gb_addr_config &= ~0xf3e777ff;
2000                 gb_addr_config |= 0x22014042;
2001                 /* check vbios table if gpu info is not available */
2002                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2003                 if (err)
2004                         return err;
2005                 break;
2006         default:
2007                 BUG();
2008                 break;
2009         }
2010
2011         if (adev->gfx.ras) {
2012                 err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2013                 if (err) {
2014                         DRM_ERROR("Failed to register gfx ras block!\n");
2015                         return err;
2016                 }
2017
2018                 strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2019                 adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2020                 adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2021                 adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2022
2023                 /* If not define special ras_late_init function, use gfx default ras_late_init */
2024                 if (!adev->gfx.ras->ras_block.ras_late_init)
2025                         adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2026
2027                 /* If not defined special ras_cb function, use default ras_cb */
2028                 if (!adev->gfx.ras->ras_block.ras_cb)
2029                         adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2030         }
2031
2032         adev->gfx.config.gb_addr_config = gb_addr_config;
2033
2034         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2035                         REG_GET_FIELD(
2036                                         adev->gfx.config.gb_addr_config,
2037                                         GB_ADDR_CONFIG,
2038                                         NUM_PIPES);
2039
2040         adev->gfx.config.max_tile_pipes =
2041                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2042
2043         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2044                         REG_GET_FIELD(
2045                                         adev->gfx.config.gb_addr_config,
2046                                         GB_ADDR_CONFIG,
2047                                         NUM_BANKS);
2048         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2049                         REG_GET_FIELD(
2050                                         adev->gfx.config.gb_addr_config,
2051                                         GB_ADDR_CONFIG,
2052                                         MAX_COMPRESSED_FRAGS);
2053         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2054                         REG_GET_FIELD(
2055                                         adev->gfx.config.gb_addr_config,
2056                                         GB_ADDR_CONFIG,
2057                                         NUM_RB_PER_SE);
2058         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2059                         REG_GET_FIELD(
2060                                         adev->gfx.config.gb_addr_config,
2061                                         GB_ADDR_CONFIG,
2062                                         NUM_SHADER_ENGINES);
2063         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2064                         REG_GET_FIELD(
2065                                         adev->gfx.config.gb_addr_config,
2066                                         GB_ADDR_CONFIG,
2067                                         PIPE_INTERLEAVE_SIZE));
2068
2069         return 0;
2070 }
2071
2072 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2073                                       int mec, int pipe, int queue)
2074 {
2075         unsigned irq_type;
2076         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2077         unsigned int hw_prio;
2078
2079         ring = &adev->gfx.compute_ring[ring_id];
2080
2081         /* mec0 is me1 */
2082         ring->me = mec + 1;
2083         ring->pipe = pipe;
2084         ring->queue = queue;
2085
2086         ring->ring_obj = NULL;
2087         ring->use_doorbell = true;
2088         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2089         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2090                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2091         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2092
2093         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2094                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2095                 + ring->pipe;
2096         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2097                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2098         /* type-2 packets are deprecated on MEC, use type-3 instead */
2099         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2100                                 hw_prio, NULL);
2101 }
2102
2103 static int gfx_v9_0_sw_init(void *handle)
2104 {
2105         int i, j, k, r, ring_id;
2106         struct amdgpu_ring *ring;
2107         struct amdgpu_kiq *kiq;
2108         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2109         unsigned int hw_prio;
2110
2111         switch (adev->ip_versions[GC_HWIP][0]) {
2112         case IP_VERSION(9, 0, 1):
2113         case IP_VERSION(9, 2, 1):
2114         case IP_VERSION(9, 4, 0):
2115         case IP_VERSION(9, 2, 2):
2116         case IP_VERSION(9, 1, 0):
2117         case IP_VERSION(9, 4, 1):
2118         case IP_VERSION(9, 3, 0):
2119         case IP_VERSION(9, 4, 2):
2120                 adev->gfx.mec.num_mec = 2;
2121                 break;
2122         default:
2123                 adev->gfx.mec.num_mec = 1;
2124                 break;
2125         }
2126
2127         adev->gfx.mec.num_pipe_per_mec = 4;
2128         adev->gfx.mec.num_queue_per_pipe = 8;
2129
2130         /* EOP Event */
2131         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2132         if (r)
2133                 return r;
2134
2135         /* Privileged reg */
2136         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2137                               &adev->gfx.priv_reg_irq);
2138         if (r)
2139                 return r;
2140
2141         /* Privileged inst */
2142         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2143                               &adev->gfx.priv_inst_irq);
2144         if (r)
2145                 return r;
2146
2147         /* ECC error */
2148         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2149                               &adev->gfx.cp_ecc_error_irq);
2150         if (r)
2151                 return r;
2152
2153         /* FUE error */
2154         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2155                               &adev->gfx.cp_ecc_error_irq);
2156         if (r)
2157                 return r;
2158
2159         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2160
2161         r = gfx_v9_0_init_microcode(adev);
2162         if (r) {
2163                 DRM_ERROR("Failed to load gfx firmware!\n");
2164                 return r;
2165         }
2166
2167         if (adev->gfx.rlc.funcs) {
2168                 if (adev->gfx.rlc.funcs->init) {
2169                         r = adev->gfx.rlc.funcs->init(adev);
2170                         if (r) {
2171                                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2172                                 return r;
2173                         }
2174                 }
2175         }
2176
2177         r = gfx_v9_0_mec_init(adev);
2178         if (r) {
2179                 DRM_ERROR("Failed to init MEC BOs!\n");
2180                 return r;
2181         }
2182
2183         /* set up the gfx ring */
2184         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2185                 ring = &adev->gfx.gfx_ring[i];
2186                 ring->ring_obj = NULL;
2187                 if (!i)
2188                         sprintf(ring->name, "gfx");
2189                 else
2190                         sprintf(ring->name, "gfx_%d", i);
2191                 ring->use_doorbell = true;
2192                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2193
2194                 /* disable scheduler on the real ring */
2195                 ring->no_scheduler = true;
2196                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2197                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2198                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2199                 if (r)
2200                         return r;
2201         }
2202
2203         /* set up the software rings */
2204         if (adev->gfx.num_gfx_rings) {
2205                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2206                         ring = &adev->gfx.sw_gfx_ring[i];
2207                         ring->ring_obj = NULL;
2208                         sprintf(ring->name, amdgpu_sw_ring_name(i));
2209                         ring->use_doorbell = true;
2210                         ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2211                         ring->is_sw_ring = true;
2212                         hw_prio = amdgpu_sw_ring_priority(i);
2213                         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2214                                              AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2215                                              NULL);
2216                         if (r)
2217                                 return r;
2218                         ring->wptr = 0;
2219                 }
2220
2221                 /* init the muxer and add software rings */
2222                 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2223                                          GFX9_NUM_SW_GFX_RINGS);
2224                 if (r) {
2225                         DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2226                         return r;
2227                 }
2228                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2229                         r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2230                                                         &adev->gfx.sw_gfx_ring[i]);
2231                         if (r) {
2232                                 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2233                                 return r;
2234                         }
2235                 }
2236         }
2237
2238         /* set up the compute queues - allocate horizontally across pipes */
2239         ring_id = 0;
2240         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2241                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2242                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2243                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2244                                         continue;
2245
2246                                 r = gfx_v9_0_compute_ring_init(adev,
2247                                                                ring_id,
2248                                                                i, k, j);
2249                                 if (r)
2250                                         return r;
2251
2252                                 ring_id++;
2253                         }
2254                 }
2255         }
2256
2257         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2258         if (r) {
2259                 DRM_ERROR("Failed to init KIQ BOs!\n");
2260                 return r;
2261         }
2262
2263         kiq = &adev->gfx.kiq;
2264         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2265         if (r)
2266                 return r;
2267
2268         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2269         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2270         if (r)
2271                 return r;
2272
2273         adev->gfx.ce_ram_size = 0x8000;
2274
2275         r = gfx_v9_0_gpu_early_init(adev);
2276         if (r)
2277                 return r;
2278
2279         return 0;
2280 }
2281
2282
2283 static int gfx_v9_0_sw_fini(void *handle)
2284 {
2285         int i;
2286         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2287
2288         if (adev->gfx.num_gfx_rings) {
2289                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2290                         amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2291                 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2292         }
2293
2294         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2295                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2296         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2297                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2298
2299         amdgpu_gfx_mqd_sw_fini(adev);
2300         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2301         amdgpu_gfx_kiq_fini(adev);
2302
2303         gfx_v9_0_mec_fini(adev);
2304         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2305                                 &adev->gfx.rlc.clear_state_gpu_addr,
2306                                 (void **)&adev->gfx.rlc.cs_ptr);
2307         if (adev->flags & AMD_IS_APU) {
2308                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2309                                 &adev->gfx.rlc.cp_table_gpu_addr,
2310                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2311         }
2312         gfx_v9_0_free_microcode(adev);
2313
2314         return 0;
2315 }
2316
2317
2318 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2319 {
2320         /* TODO */
2321 }
2322
2323 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2324                            u32 instance)
2325 {
2326         u32 data;
2327
2328         if (instance == 0xffffffff)
2329                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2330         else
2331                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2332
2333         if (se_num == 0xffffffff)
2334                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2335         else
2336                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2337
2338         if (sh_num == 0xffffffff)
2339                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2340         else
2341                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2342
2343         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2344 }
2345
2346 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2347 {
2348         u32 data, mask;
2349
2350         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2351         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2352
2353         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2354         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2355
2356         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2357                                          adev->gfx.config.max_sh_per_se);
2358
2359         return (~data) & mask;
2360 }
2361
2362 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2363 {
2364         int i, j;
2365         u32 data;
2366         u32 active_rbs = 0;
2367         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2368                                         adev->gfx.config.max_sh_per_se;
2369
2370         mutex_lock(&adev->grbm_idx_mutex);
2371         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2372                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2373                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2374                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2375                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2376                                                rb_bitmap_width_per_sh);
2377                 }
2378         }
2379         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2380         mutex_unlock(&adev->grbm_idx_mutex);
2381
2382         adev->gfx.config.backend_enable_mask = active_rbs;
2383         adev->gfx.config.num_rbs = hweight32(active_rbs);
2384 }
2385
2386 #define DEFAULT_SH_MEM_BASES    (0x6000)
2387 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2388 {
2389         int i;
2390         uint32_t sh_mem_config;
2391         uint32_t sh_mem_bases;
2392
2393         /*
2394          * Configure apertures:
2395          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2396          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2397          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2398          */
2399         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2400
2401         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2402                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2403                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2404
2405         mutex_lock(&adev->srbm_mutex);
2406         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2407                 soc15_grbm_select(adev, 0, 0, 0, i);
2408                 /* CP and shaders */
2409                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2410                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2411         }
2412         soc15_grbm_select(adev, 0, 0, 0, 0);
2413         mutex_unlock(&adev->srbm_mutex);
2414
2415         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2416            access. These should be enabled by FW for target VMIDs. */
2417         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2418                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2419                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2420                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2421                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2422         }
2423 }
2424
2425 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2426 {
2427         int vmid;
2428
2429         /*
2430          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2431          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2432          * the driver can enable them for graphics. VMID0 should maintain
2433          * access so that HWS firmware can save/restore entries.
2434          */
2435         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2436                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2437                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2438                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2439                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2440         }
2441 }
2442
2443 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2444 {
2445         uint32_t tmp;
2446
2447         switch (adev->ip_versions[GC_HWIP][0]) {
2448         case IP_VERSION(9, 4, 1):
2449                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2450                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2451                                         DISABLE_BARRIER_WAITCNT, 1);
2452                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2453                 break;
2454         default:
2455                 break;
2456         }
2457 }
2458
2459 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2460 {
2461         u32 tmp;
2462         int i;
2463
2464         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2465
2466         gfx_v9_0_tiling_mode_table_init(adev);
2467
2468         if (adev->gfx.num_gfx_rings)
2469                 gfx_v9_0_setup_rb(adev);
2470         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2471         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2472
2473         /* XXX SH_MEM regs */
2474         /* where to put LDS, scratch, GPUVM in FSA64 space */
2475         mutex_lock(&adev->srbm_mutex);
2476         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2477                 soc15_grbm_select(adev, 0, 0, 0, i);
2478                 /* CP and shaders */
2479                 if (i == 0) {
2480                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2481                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2482                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2483                                             !!adev->gmc.noretry);
2484                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2485                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2486                 } else {
2487                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2488                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2489                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2490                                             !!adev->gmc.noretry);
2491                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2492                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2493                                 (adev->gmc.private_aperture_start >> 48));
2494                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2495                                 (adev->gmc.shared_aperture_start >> 48));
2496                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2497                 }
2498         }
2499         soc15_grbm_select(adev, 0, 0, 0, 0);
2500
2501         mutex_unlock(&adev->srbm_mutex);
2502
2503         gfx_v9_0_init_compute_vmid(adev);
2504         gfx_v9_0_init_gds_vmid(adev);
2505         gfx_v9_0_init_sq_config(adev);
2506 }
2507
2508 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2509 {
2510         u32 i, j, k;
2511         u32 mask;
2512
2513         mutex_lock(&adev->grbm_idx_mutex);
2514         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2515                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2516                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
2517                         for (k = 0; k < adev->usec_timeout; k++) {
2518                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2519                                         break;
2520                                 udelay(1);
2521                         }
2522                         if (k == adev->usec_timeout) {
2523                                 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2524                                                       0xffffffff, 0xffffffff);
2525                                 mutex_unlock(&adev->grbm_idx_mutex);
2526                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2527                                          i, j);
2528                                 return;
2529                         }
2530                 }
2531         }
2532         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2533         mutex_unlock(&adev->grbm_idx_mutex);
2534
2535         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2536                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2537                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2538                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2539         for (k = 0; k < adev->usec_timeout; k++) {
2540                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2541                         break;
2542                 udelay(1);
2543         }
2544 }
2545
2546 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2547                                                bool enable)
2548 {
2549         u32 tmp;
2550
2551         /* These interrupts should be enabled to drive DS clock */
2552
2553         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2554
2555         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2556         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2557         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2558         if(adev->gfx.num_gfx_rings)
2559                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2560
2561         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2562 }
2563
2564 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2565 {
2566         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2567         /* csib */
2568         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2569                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2570         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2571                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2572         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2573                         adev->gfx.rlc.clear_state_size);
2574 }
2575
2576 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2577                                 int indirect_offset,
2578                                 int list_size,
2579                                 int *unique_indirect_regs,
2580                                 int unique_indirect_reg_count,
2581                                 int *indirect_start_offsets,
2582                                 int *indirect_start_offsets_count,
2583                                 int max_start_offsets_count)
2584 {
2585         int idx;
2586
2587         for (; indirect_offset < list_size; indirect_offset++) {
2588                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2589                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2590                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2591
2592                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2593                         indirect_offset += 2;
2594
2595                         /* look for the matching indice */
2596                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2597                                 if (unique_indirect_regs[idx] ==
2598                                         register_list_format[indirect_offset] ||
2599                                         !unique_indirect_regs[idx])
2600                                         break;
2601                         }
2602
2603                         BUG_ON(idx >= unique_indirect_reg_count);
2604
2605                         if (!unique_indirect_regs[idx])
2606                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2607
2608                         indirect_offset++;
2609                 }
2610         }
2611 }
2612
2613 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2614 {
2615         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2616         int unique_indirect_reg_count = 0;
2617
2618         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2619         int indirect_start_offsets_count = 0;
2620
2621         int list_size = 0;
2622         int i = 0, j = 0;
2623         u32 tmp = 0;
2624
2625         u32 *register_list_format =
2626                 kmemdup(adev->gfx.rlc.register_list_format,
2627                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2628         if (!register_list_format)
2629                 return -ENOMEM;
2630
2631         /* setup unique_indirect_regs array and indirect_start_offsets array */
2632         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2633         gfx_v9_1_parse_ind_reg_list(register_list_format,
2634                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2635                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2636                                     unique_indirect_regs,
2637                                     unique_indirect_reg_count,
2638                                     indirect_start_offsets,
2639                                     &indirect_start_offsets_count,
2640                                     ARRAY_SIZE(indirect_start_offsets));
2641
2642         /* enable auto inc in case it is disabled */
2643         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2644         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2645         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2646
2647         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2648         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2649                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2650         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2651                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2652                         adev->gfx.rlc.register_restore[i]);
2653
2654         /* load indirect register */
2655         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2656                 adev->gfx.rlc.reg_list_format_start);
2657
2658         /* direct register portion */
2659         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2660                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2661                         register_list_format[i]);
2662
2663         /* indirect register portion */
2664         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2665                 if (register_list_format[i] == 0xFFFFFFFF) {
2666                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2667                         continue;
2668                 }
2669
2670                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2671                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2672
2673                 for (j = 0; j < unique_indirect_reg_count; j++) {
2674                         if (register_list_format[i] == unique_indirect_regs[j]) {
2675                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2676                                 break;
2677                         }
2678                 }
2679
2680                 BUG_ON(j >= unique_indirect_reg_count);
2681
2682                 i++;
2683         }
2684
2685         /* set save/restore list size */
2686         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2687         list_size = list_size >> 1;
2688         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2689                 adev->gfx.rlc.reg_restore_list_size);
2690         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2691
2692         /* write the starting offsets to RLC scratch ram */
2693         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2694                 adev->gfx.rlc.starting_offsets_start);
2695         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2696                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2697                        indirect_start_offsets[i]);
2698
2699         /* load unique indirect regs*/
2700         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2701                 if (unique_indirect_regs[i] != 0) {
2702                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2703                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2704                                unique_indirect_regs[i] & 0x3FFFF);
2705
2706                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2707                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2708                                unique_indirect_regs[i] >> 20);
2709                 }
2710         }
2711
2712         kfree(register_list_format);
2713         return 0;
2714 }
2715
2716 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2717 {
2718         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2719 }
2720
2721 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2722                                              bool enable)
2723 {
2724         uint32_t data = 0;
2725         uint32_t default_data = 0;
2726
2727         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2728         if (enable) {
2729                 /* enable GFXIP control over CGPG */
2730                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2731                 if(default_data != data)
2732                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2733
2734                 /* update status */
2735                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2736                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2737                 if(default_data != data)
2738                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2739         } else {
2740                 /* restore GFXIP control over GCPG */
2741                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2742                 if(default_data != data)
2743                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2744         }
2745 }
2746
2747 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2748 {
2749         uint32_t data = 0;
2750
2751         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2752                               AMD_PG_SUPPORT_GFX_SMG |
2753                               AMD_PG_SUPPORT_GFX_DMG)) {
2754                 /* init IDLE_POLL_COUNT = 60 */
2755                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2756                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2757                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2758                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2759
2760                 /* init RLC PG Delay */
2761                 data = 0;
2762                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2763                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2764                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2765                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2766                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2767
2768                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2769                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2770                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2771                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2772
2773                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2774                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2775                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2776                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2777
2778                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2779                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2780
2781                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2782                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2783                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2784                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2785                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2786         }
2787 }
2788
2789 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2790                                                 bool enable)
2791 {
2792         uint32_t data = 0;
2793         uint32_t default_data = 0;
2794
2795         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2796         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2797                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2798                              enable ? 1 : 0);
2799         if (default_data != data)
2800                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2801 }
2802
2803 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2804                                                 bool enable)
2805 {
2806         uint32_t data = 0;
2807         uint32_t default_data = 0;
2808
2809         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2810         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2811                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2812                              enable ? 1 : 0);
2813         if(default_data != data)
2814                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2815 }
2816
2817 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2818                                         bool enable)
2819 {
2820         uint32_t data = 0;
2821         uint32_t default_data = 0;
2822
2823         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2824         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2825                              CP_PG_DISABLE,
2826                              enable ? 0 : 1);
2827         if(default_data != data)
2828                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2829 }
2830
2831 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2832                                                 bool enable)
2833 {
2834         uint32_t data, default_data;
2835
2836         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2837         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2838                              GFX_POWER_GATING_ENABLE,
2839                              enable ? 1 : 0);
2840         if(default_data != data)
2841                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2842 }
2843
2844 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2845                                                 bool enable)
2846 {
2847         uint32_t data, default_data;
2848
2849         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2850         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2851                              GFX_PIPELINE_PG_ENABLE,
2852                              enable ? 1 : 0);
2853         if(default_data != data)
2854                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2855
2856         if (!enable)
2857                 /* read any GFX register to wake up GFX */
2858                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2859 }
2860
2861 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2862                                                        bool enable)
2863 {
2864         uint32_t data, default_data;
2865
2866         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2867         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2868                              STATIC_PER_CU_PG_ENABLE,
2869                              enable ? 1 : 0);
2870         if(default_data != data)
2871                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2872 }
2873
2874 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2875                                                 bool enable)
2876 {
2877         uint32_t data, default_data;
2878
2879         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2880         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2881                              DYN_PER_CU_PG_ENABLE,
2882                              enable ? 1 : 0);
2883         if(default_data != data)
2884                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2885 }
2886
2887 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2888 {
2889         gfx_v9_0_init_csb(adev);
2890
2891         /*
2892          * Rlc save restore list is workable since v2_1.
2893          * And it's needed by gfxoff feature.
2894          */
2895         if (adev->gfx.rlc.is_rlc_v2_1) {
2896                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
2897                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
2898                         gfx_v9_1_init_rlc_save_restore_list(adev);
2899                 gfx_v9_0_enable_save_restore_machine(adev);
2900         }
2901
2902         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2903                               AMD_PG_SUPPORT_GFX_SMG |
2904                               AMD_PG_SUPPORT_GFX_DMG |
2905                               AMD_PG_SUPPORT_CP |
2906                               AMD_PG_SUPPORT_GDS |
2907                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2908                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2909                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
2910                 gfx_v9_0_init_gfx_power_gating(adev);
2911         }
2912 }
2913
2914 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2915 {
2916         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2917         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2918         gfx_v9_0_wait_for_rlc_serdes(adev);
2919 }
2920
2921 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2922 {
2923         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2924         udelay(50);
2925         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2926         udelay(50);
2927 }
2928
2929 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2930 {
2931 #ifdef AMDGPU_RLC_DEBUG_RETRY
2932         u32 rlc_ucode_ver;
2933 #endif
2934
2935         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2936         udelay(50);
2937
2938         /* carrizo do enable cp interrupt after cp inited */
2939         if (!(adev->flags & AMD_IS_APU)) {
2940                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2941                 udelay(50);
2942         }
2943
2944 #ifdef AMDGPU_RLC_DEBUG_RETRY
2945         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2946         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2947         if(rlc_ucode_ver == 0x108) {
2948                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2949                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2950                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2951                  * default is 0x9C4 to create a 100us interval */
2952                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2953                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2954                  * to disable the page fault retry interrupts, default is
2955                  * 0x100 (256) */
2956                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2957         }
2958 #endif
2959 }
2960
2961 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2962 {
2963         const struct rlc_firmware_header_v2_0 *hdr;
2964         const __le32 *fw_data;
2965         unsigned i, fw_size;
2966
2967         if (!adev->gfx.rlc_fw)
2968                 return -EINVAL;
2969
2970         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2971         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2972
2973         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2974                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2975         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2976
2977         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2978                         RLCG_UCODE_LOADING_START_ADDRESS);
2979         for (i = 0; i < fw_size; i++)
2980                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2981         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2982
2983         return 0;
2984 }
2985
2986 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2987 {
2988         int r;
2989
2990         if (amdgpu_sriov_vf(adev)) {
2991                 gfx_v9_0_init_csb(adev);
2992                 return 0;
2993         }
2994
2995         adev->gfx.rlc.funcs->stop(adev);
2996
2997         /* disable CG */
2998         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2999
3000         gfx_v9_0_init_pg(adev);
3001
3002         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3003                 /* legacy rlc firmware loading */
3004                 r = gfx_v9_0_rlc_load_microcode(adev);
3005                 if (r)
3006                         return r;
3007         }
3008
3009         switch (adev->ip_versions[GC_HWIP][0]) {
3010         case IP_VERSION(9, 2, 2):
3011         case IP_VERSION(9, 1, 0):
3012                 if (amdgpu_lbpw == 0)
3013                         gfx_v9_0_enable_lbpw(adev, false);
3014                 else
3015                         gfx_v9_0_enable_lbpw(adev, true);
3016                 break;
3017         case IP_VERSION(9, 4, 0):
3018                 if (amdgpu_lbpw > 0)
3019                         gfx_v9_0_enable_lbpw(adev, true);
3020                 else
3021                         gfx_v9_0_enable_lbpw(adev, false);
3022                 break;
3023         default:
3024                 break;
3025         }
3026
3027         adev->gfx.rlc.funcs->start(adev);
3028
3029         return 0;
3030 }
3031
3032 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3033 {
3034         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3035
3036         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3037         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3038         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3039         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3040         udelay(50);
3041 }
3042
3043 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3044 {
3045         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3046         const struct gfx_firmware_header_v1_0 *ce_hdr;
3047         const struct gfx_firmware_header_v1_0 *me_hdr;
3048         const __le32 *fw_data;
3049         unsigned i, fw_size;
3050
3051         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3052                 return -EINVAL;
3053
3054         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3055                 adev->gfx.pfp_fw->data;
3056         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3057                 adev->gfx.ce_fw->data;
3058         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3059                 adev->gfx.me_fw->data;
3060
3061         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3062         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3063         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3064
3065         gfx_v9_0_cp_gfx_enable(adev, false);
3066
3067         /* PFP */
3068         fw_data = (const __le32 *)
3069                 (adev->gfx.pfp_fw->data +
3070                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3071         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3072         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3073         for (i = 0; i < fw_size; i++)
3074                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3075         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3076
3077         /* CE */
3078         fw_data = (const __le32 *)
3079                 (adev->gfx.ce_fw->data +
3080                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3081         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3082         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3083         for (i = 0; i < fw_size; i++)
3084                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3085         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3086
3087         /* ME */
3088         fw_data = (const __le32 *)
3089                 (adev->gfx.me_fw->data +
3090                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3091         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3092         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3093         for (i = 0; i < fw_size; i++)
3094                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3095         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3096
3097         return 0;
3098 }
3099
3100 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3101 {
3102         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3103         const struct cs_section_def *sect = NULL;
3104         const struct cs_extent_def *ext = NULL;
3105         int r, i, tmp;
3106
3107         /* init the CP */
3108         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3109         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3110
3111         gfx_v9_0_cp_gfx_enable(adev, true);
3112
3113         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3114         if (r) {
3115                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3116                 return r;
3117         }
3118
3119         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3120         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3121
3122         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3123         amdgpu_ring_write(ring, 0x80000000);
3124         amdgpu_ring_write(ring, 0x80000000);
3125
3126         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3127                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3128                         if (sect->id == SECT_CONTEXT) {
3129                                 amdgpu_ring_write(ring,
3130                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3131                                                ext->reg_count));
3132                                 amdgpu_ring_write(ring,
3133                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3134                                 for (i = 0; i < ext->reg_count; i++)
3135                                         amdgpu_ring_write(ring, ext->extent[i]);
3136                         }
3137                 }
3138         }
3139
3140         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3141         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3142
3143         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3144         amdgpu_ring_write(ring, 0);
3145
3146         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3147         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3148         amdgpu_ring_write(ring, 0x8000);
3149         amdgpu_ring_write(ring, 0x8000);
3150
3151         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3152         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3153                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3154         amdgpu_ring_write(ring, tmp);
3155         amdgpu_ring_write(ring, 0);
3156
3157         amdgpu_ring_commit(ring);
3158
3159         return 0;
3160 }
3161
3162 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3163 {
3164         struct amdgpu_ring *ring;
3165         u32 tmp;
3166         u32 rb_bufsz;
3167         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3168
3169         /* Set the write pointer delay */
3170         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3171
3172         /* set the RB to use vmid 0 */
3173         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3174
3175         /* Set ring buffer size */
3176         ring = &adev->gfx.gfx_ring[0];
3177         rb_bufsz = order_base_2(ring->ring_size / 8);
3178         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3179         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3180 #ifdef __BIG_ENDIAN
3181         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3182 #endif
3183         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3184
3185         /* Initialize the ring buffer's write pointers */
3186         ring->wptr = 0;
3187         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3188         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3189
3190         /* set the wb address wether it's enabled or not */
3191         rptr_addr = ring->rptr_gpu_addr;
3192         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3193         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3194
3195         wptr_gpu_addr = ring->wptr_gpu_addr;
3196         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3197         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3198
3199         mdelay(1);
3200         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3201
3202         rb_addr = ring->gpu_addr >> 8;
3203         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3204         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3205
3206         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3207         if (ring->use_doorbell) {
3208                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3209                                     DOORBELL_OFFSET, ring->doorbell_index);
3210                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3211                                     DOORBELL_EN, 1);
3212         } else {
3213                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3214         }
3215         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3216
3217         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3218                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3219         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3220
3221         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3222                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3223
3224
3225         /* start the ring */
3226         gfx_v9_0_cp_gfx_start(adev);
3227         ring->sched.ready = true;
3228
3229         return 0;
3230 }
3231
3232 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3233 {
3234         if (enable) {
3235                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3236         } else {
3237                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3238                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3239                 adev->gfx.kiq.ring.sched.ready = false;
3240         }
3241         udelay(50);
3242 }
3243
3244 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3245 {
3246         const struct gfx_firmware_header_v1_0 *mec_hdr;
3247         const __le32 *fw_data;
3248         unsigned i;
3249         u32 tmp;
3250
3251         if (!adev->gfx.mec_fw)
3252                 return -EINVAL;
3253
3254         gfx_v9_0_cp_compute_enable(adev, false);
3255
3256         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3257         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3258
3259         fw_data = (const __le32 *)
3260                 (adev->gfx.mec_fw->data +
3261                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3262         tmp = 0;
3263         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3264         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3265         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3266
3267         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3268                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3269         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3270                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3271
3272         /* MEC1 */
3273         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3274                          mec_hdr->jt_offset);
3275         for (i = 0; i < mec_hdr->jt_size; i++)
3276                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3277                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3278
3279         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3280                         adev->gfx.mec_fw_version);
3281         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3282
3283         return 0;
3284 }
3285
3286 /* KIQ functions */
3287 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3288 {
3289         uint32_t tmp;
3290         struct amdgpu_device *adev = ring->adev;
3291
3292         /* tell RLC which is KIQ queue */
3293         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3294         tmp &= 0xffffff00;
3295         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3296         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3297         tmp |= 0x80;
3298         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3299 }
3300
3301 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3302 {
3303         struct amdgpu_device *adev = ring->adev;
3304
3305         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3306                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3307                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3308                         mqd->cp_hqd_queue_priority =
3309                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3310                 }
3311         }
3312 }
3313
3314 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3315 {
3316         struct amdgpu_device *adev = ring->adev;
3317         struct v9_mqd *mqd = ring->mqd_ptr;
3318         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3319         uint32_t tmp;
3320
3321         mqd->header = 0xC0310800;
3322         mqd->compute_pipelinestat_enable = 0x00000001;
3323         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3324         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3325         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3326         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3327         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3328         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3329         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3330         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3331         mqd->compute_misc_reserved = 0x00000003;
3332
3333         mqd->dynamic_cu_mask_addr_lo =
3334                 lower_32_bits(ring->mqd_gpu_addr
3335                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3336         mqd->dynamic_cu_mask_addr_hi =
3337                 upper_32_bits(ring->mqd_gpu_addr
3338                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3339
3340         eop_base_addr = ring->eop_gpu_addr >> 8;
3341         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3342         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3343
3344         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3345         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3346         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3347                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3348
3349         mqd->cp_hqd_eop_control = tmp;
3350
3351         /* enable doorbell? */
3352         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3353
3354         if (ring->use_doorbell) {
3355                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3356                                     DOORBELL_OFFSET, ring->doorbell_index);
3357                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3358                                     DOORBELL_EN, 1);
3359                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3360                                     DOORBELL_SOURCE, 0);
3361                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3362                                     DOORBELL_HIT, 0);
3363         } else {
3364                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3365                                          DOORBELL_EN, 0);
3366         }
3367
3368         mqd->cp_hqd_pq_doorbell_control = tmp;
3369
3370         /* disable the queue if it's active */
3371         ring->wptr = 0;
3372         mqd->cp_hqd_dequeue_request = 0;
3373         mqd->cp_hqd_pq_rptr = 0;
3374         mqd->cp_hqd_pq_wptr_lo = 0;
3375         mqd->cp_hqd_pq_wptr_hi = 0;
3376
3377         /* set the pointer to the MQD */
3378         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3379         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3380
3381         /* set MQD vmid to 0 */
3382         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3383         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3384         mqd->cp_mqd_control = tmp;
3385
3386         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3387         hqd_gpu_addr = ring->gpu_addr >> 8;
3388         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3389         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3390
3391         /* set up the HQD, this is similar to CP_RB0_CNTL */
3392         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3393         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3394                             (order_base_2(ring->ring_size / 4) - 1));
3395         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3396                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3397 #ifdef __BIG_ENDIAN
3398         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3399 #endif
3400         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3401         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3402         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3403         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3404         mqd->cp_hqd_pq_control = tmp;
3405
3406         /* set the wb address whether it's enabled or not */
3407         wb_gpu_addr = ring->rptr_gpu_addr;
3408         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3409         mqd->cp_hqd_pq_rptr_report_addr_hi =
3410                 upper_32_bits(wb_gpu_addr) & 0xffff;
3411
3412         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3413         wb_gpu_addr = ring->wptr_gpu_addr;
3414         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3415         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3416
3417         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3418         ring->wptr = 0;
3419         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3420
3421         /* set the vmid for the queue */
3422         mqd->cp_hqd_vmid = 0;
3423
3424         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3425         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3426         mqd->cp_hqd_persistent_state = tmp;
3427
3428         /* set MIN_IB_AVAIL_SIZE */
3429         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3430         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3431         mqd->cp_hqd_ib_control = tmp;
3432
3433         /* set static priority for a queue/ring */
3434         gfx_v9_0_mqd_set_priority(ring, mqd);
3435         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3436
3437         /* map_queues packet doesn't need activate the queue,
3438          * so only kiq need set this field.
3439          */
3440         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3441                 mqd->cp_hqd_active = 1;
3442
3443         return 0;
3444 }
3445
3446 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3447 {
3448         struct amdgpu_device *adev = ring->adev;
3449         struct v9_mqd *mqd = ring->mqd_ptr;
3450         int j;
3451
3452         /* disable wptr polling */
3453         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3454
3455         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3456                mqd->cp_hqd_eop_base_addr_lo);
3457         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3458                mqd->cp_hqd_eop_base_addr_hi);
3459
3460         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3461         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3462                mqd->cp_hqd_eop_control);
3463
3464         /* enable doorbell? */
3465         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3466                mqd->cp_hqd_pq_doorbell_control);
3467
3468         /* disable the queue if it's active */
3469         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3470                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3471                 for (j = 0; j < adev->usec_timeout; j++) {
3472                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3473                                 break;
3474                         udelay(1);
3475                 }
3476                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3477                        mqd->cp_hqd_dequeue_request);
3478                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3479                        mqd->cp_hqd_pq_rptr);
3480                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3481                        mqd->cp_hqd_pq_wptr_lo);
3482                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3483                        mqd->cp_hqd_pq_wptr_hi);
3484         }
3485
3486         /* set the pointer to the MQD */
3487         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3488                mqd->cp_mqd_base_addr_lo);
3489         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3490                mqd->cp_mqd_base_addr_hi);
3491
3492         /* set MQD vmid to 0 */
3493         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3494                mqd->cp_mqd_control);
3495
3496         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3497         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3498                mqd->cp_hqd_pq_base_lo);
3499         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3500                mqd->cp_hqd_pq_base_hi);
3501
3502         /* set up the HQD, this is similar to CP_RB0_CNTL */
3503         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3504                mqd->cp_hqd_pq_control);
3505
3506         /* set the wb address whether it's enabled or not */
3507         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3508                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3509         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3510                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3511
3512         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3513         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3514                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3515         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3516                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3517
3518         /* enable the doorbell if requested */
3519         if (ring->use_doorbell) {
3520                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3521                                         (adev->doorbell_index.kiq * 2) << 2);
3522                 /* If GC has entered CGPG, ringing doorbell > first page
3523                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3524                  * workaround this issue. And this change has to align with firmware
3525                  * update.
3526                  */
3527                 if (check_if_enlarge_doorbell_range(adev))
3528                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3529                                         (adev->doorbell.size - 4));
3530                 else
3531                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3532                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3533         }
3534
3535         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3536                mqd->cp_hqd_pq_doorbell_control);
3537
3538         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3539         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3540                mqd->cp_hqd_pq_wptr_lo);
3541         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3542                mqd->cp_hqd_pq_wptr_hi);
3543
3544         /* set the vmid for the queue */
3545         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3546
3547         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3548                mqd->cp_hqd_persistent_state);
3549
3550         /* activate the queue */
3551         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3552                mqd->cp_hqd_active);
3553
3554         if (ring->use_doorbell)
3555                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3556
3557         return 0;
3558 }
3559
3560 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3561 {
3562         struct amdgpu_device *adev = ring->adev;
3563         int j;
3564
3565         /* disable the queue if it's active */
3566         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3567
3568                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3569
3570                 for (j = 0; j < adev->usec_timeout; j++) {
3571                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3572                                 break;
3573                         udelay(1);
3574                 }
3575
3576                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3577                         DRM_DEBUG("KIQ dequeue request failed.\n");
3578
3579                         /* Manual disable if dequeue request times out */
3580                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3581                 }
3582
3583                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3584                       0);
3585         }
3586
3587         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3588         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3589         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3590         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3591         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3592         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3593         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3595
3596         return 0;
3597 }
3598
3599 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3600 {
3601         struct amdgpu_device *adev = ring->adev;
3602         struct v9_mqd *mqd = ring->mqd_ptr;
3603         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3604         struct v9_mqd *tmp_mqd;
3605
3606         gfx_v9_0_kiq_setting(ring);
3607
3608         /* GPU could be in bad state during probe, driver trigger the reset
3609          * after load the SMU, in this case , the mqd is not be initialized.
3610          * driver need to re-init the mqd.
3611          * check mqd->cp_hqd_pq_control since this value should not be 0
3612          */
3613         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3614         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3615                 /* for GPU_RESET case , reset MQD to a clean status */
3616                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3617                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3618
3619                 /* reset ring buffer */
3620                 ring->wptr = 0;
3621                 amdgpu_ring_clear_ring(ring);
3622
3623                 mutex_lock(&adev->srbm_mutex);
3624                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3625                 gfx_v9_0_kiq_init_register(ring);
3626                 soc15_grbm_select(adev, 0, 0, 0, 0);
3627                 mutex_unlock(&adev->srbm_mutex);
3628         } else {
3629                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3630                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3631                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3632                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3633                         amdgpu_ring_clear_ring(ring);
3634                 mutex_lock(&adev->srbm_mutex);
3635                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3636                 gfx_v9_0_mqd_init(ring);
3637                 gfx_v9_0_kiq_init_register(ring);
3638                 soc15_grbm_select(adev, 0, 0, 0, 0);
3639                 mutex_unlock(&adev->srbm_mutex);
3640
3641                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3642                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3643         }
3644
3645         return 0;
3646 }
3647
3648 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3649 {
3650         struct amdgpu_device *adev = ring->adev;
3651         struct v9_mqd *mqd = ring->mqd_ptr;
3652         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3653         struct v9_mqd *tmp_mqd;
3654
3655         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3656          * is not be initialized before
3657          */
3658         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3659
3660         if (!tmp_mqd->cp_hqd_pq_control ||
3661             (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3662                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3663                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3664                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3665                 mutex_lock(&adev->srbm_mutex);
3666                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3667                 gfx_v9_0_mqd_init(ring);
3668                 soc15_grbm_select(adev, 0, 0, 0, 0);
3669                 mutex_unlock(&adev->srbm_mutex);
3670
3671                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3672                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3673         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3674                 /* reset MQD to a clean status */
3675                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3676                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3677
3678                 /* reset ring buffer */
3679                 ring->wptr = 0;
3680                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3681                 amdgpu_ring_clear_ring(ring);
3682         } else {
3683                 amdgpu_ring_clear_ring(ring);
3684         }
3685
3686         return 0;
3687 }
3688
3689 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3690 {
3691         struct amdgpu_ring *ring;
3692         int r;
3693
3694         ring = &adev->gfx.kiq.ring;
3695
3696         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3697         if (unlikely(r != 0))
3698                 return r;
3699
3700         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3701         if (unlikely(r != 0))
3702                 return r;
3703
3704         gfx_v9_0_kiq_init_queue(ring);
3705         amdgpu_bo_kunmap(ring->mqd_obj);
3706         ring->mqd_ptr = NULL;
3707         amdgpu_bo_unreserve(ring->mqd_obj);
3708         ring->sched.ready = true;
3709         return 0;
3710 }
3711
3712 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3713 {
3714         struct amdgpu_ring *ring = NULL;
3715         int r = 0, i;
3716
3717         gfx_v9_0_cp_compute_enable(adev, true);
3718
3719         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3720                 ring = &adev->gfx.compute_ring[i];
3721
3722                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3723                 if (unlikely(r != 0))
3724                         goto done;
3725                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3726                 if (!r) {
3727                         r = gfx_v9_0_kcq_init_queue(ring);
3728                         amdgpu_bo_kunmap(ring->mqd_obj);
3729                         ring->mqd_ptr = NULL;
3730                 }
3731                 amdgpu_bo_unreserve(ring->mqd_obj);
3732                 if (r)
3733                         goto done;
3734         }
3735
3736         r = amdgpu_gfx_enable_kcq(adev);
3737 done:
3738         return r;
3739 }
3740
3741 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3742 {
3743         int r, i;
3744         struct amdgpu_ring *ring;
3745
3746         if (!(adev->flags & AMD_IS_APU))
3747                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3748
3749         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3750                 if (adev->gfx.num_gfx_rings) {
3751                         /* legacy firmware loading */
3752                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3753                         if (r)
3754                                 return r;
3755                 }
3756
3757                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3758                 if (r)
3759                         return r;
3760         }
3761
3762         r = gfx_v9_0_kiq_resume(adev);
3763         if (r)
3764                 return r;
3765
3766         if (adev->gfx.num_gfx_rings) {
3767                 r = gfx_v9_0_cp_gfx_resume(adev);
3768                 if (r)
3769                         return r;
3770         }
3771
3772         r = gfx_v9_0_kcq_resume(adev);
3773         if (r)
3774                 return r;
3775
3776         if (adev->gfx.num_gfx_rings) {
3777                 ring = &adev->gfx.gfx_ring[0];
3778                 r = amdgpu_ring_test_helper(ring);
3779                 if (r)
3780                         return r;
3781         }
3782
3783         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3784                 ring = &adev->gfx.compute_ring[i];
3785                 amdgpu_ring_test_helper(ring);
3786         }
3787
3788         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3789
3790         return 0;
3791 }
3792
3793 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3794 {
3795         u32 tmp;
3796
3797         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3798             adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3799                 return;
3800
3801         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3802         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3803                                 adev->df.hash_status.hash_64k);
3804         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3805                                 adev->df.hash_status.hash_2m);
3806         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3807                                 adev->df.hash_status.hash_1g);
3808         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3809 }
3810
3811 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3812 {
3813         if (adev->gfx.num_gfx_rings)
3814                 gfx_v9_0_cp_gfx_enable(adev, enable);
3815         gfx_v9_0_cp_compute_enable(adev, enable);
3816 }
3817
3818 static int gfx_v9_0_hw_init(void *handle)
3819 {
3820         int r;
3821         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3822
3823         if (!amdgpu_sriov_vf(adev))
3824                 gfx_v9_0_init_golden_registers(adev);
3825
3826         gfx_v9_0_constants_init(adev);
3827
3828         gfx_v9_0_init_tcp_config(adev);
3829
3830         r = adev->gfx.rlc.funcs->resume(adev);
3831         if (r)
3832                 return r;
3833
3834         r = gfx_v9_0_cp_resume(adev);
3835         if (r)
3836                 return r;
3837
3838         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3839                 gfx_v9_4_2_set_power_brake_sequence(adev);
3840
3841         return r;
3842 }
3843
3844 static int gfx_v9_0_hw_fini(void *handle)
3845 {
3846         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3847
3848         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3849         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3850         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3851
3852         /* DF freeze and kcq disable will fail */
3853         if (!amdgpu_ras_intr_triggered())
3854                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3855                 amdgpu_gfx_disable_kcq(adev);
3856
3857         if (amdgpu_sriov_vf(adev)) {
3858                 gfx_v9_0_cp_gfx_enable(adev, false);
3859                 /* must disable polling for SRIOV when hw finished, otherwise
3860                  * CPC engine may still keep fetching WB address which is already
3861                  * invalid after sw finished and trigger DMAR reading error in
3862                  * hypervisor side.
3863                  */
3864                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3865                 return 0;
3866         }
3867
3868         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3869          * otherwise KIQ is hanging when binding back
3870          */
3871         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3872                 mutex_lock(&adev->srbm_mutex);
3873                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3874                                 adev->gfx.kiq.ring.pipe,
3875                                 adev->gfx.kiq.ring.queue, 0);
3876                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3877                 soc15_grbm_select(adev, 0, 0, 0, 0);
3878                 mutex_unlock(&adev->srbm_mutex);
3879         }
3880
3881         gfx_v9_0_cp_enable(adev, false);
3882
3883         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3884         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3885             (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
3886                 dev_dbg(adev->dev, "Skipping RLC halt\n");
3887                 return 0;
3888         }
3889
3890         adev->gfx.rlc.funcs->stop(adev);
3891         return 0;
3892 }
3893
3894 static int gfx_v9_0_suspend(void *handle)
3895 {
3896         return gfx_v9_0_hw_fini(handle);
3897 }
3898
3899 static int gfx_v9_0_resume(void *handle)
3900 {
3901         return gfx_v9_0_hw_init(handle);
3902 }
3903
3904 static bool gfx_v9_0_is_idle(void *handle)
3905 {
3906         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3907
3908         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3909                                 GRBM_STATUS, GUI_ACTIVE))
3910                 return false;
3911         else
3912                 return true;
3913 }
3914
3915 static int gfx_v9_0_wait_for_idle(void *handle)
3916 {
3917         unsigned i;
3918         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3919
3920         for (i = 0; i < adev->usec_timeout; i++) {
3921                 if (gfx_v9_0_is_idle(handle))
3922                         return 0;
3923                 udelay(1);
3924         }
3925         return -ETIMEDOUT;
3926 }
3927
3928 static int gfx_v9_0_soft_reset(void *handle)
3929 {
3930         u32 grbm_soft_reset = 0;
3931         u32 tmp;
3932         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3933
3934         /* GRBM_STATUS */
3935         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3936         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3937                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3938                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3939                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3940                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3941                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3942                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3943                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3944                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3945                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3946         }
3947
3948         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3949                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3950                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3951         }
3952
3953         /* GRBM_STATUS2 */
3954         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3955         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3956                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3957                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3958
3959
3960         if (grbm_soft_reset) {
3961                 /* stop the rlc */
3962                 adev->gfx.rlc.funcs->stop(adev);
3963
3964                 if (adev->gfx.num_gfx_rings)
3965                         /* Disable GFX parsing/prefetching */
3966                         gfx_v9_0_cp_gfx_enable(adev, false);
3967
3968                 /* Disable MEC parsing/prefetching */
3969                 gfx_v9_0_cp_compute_enable(adev, false);
3970
3971                 if (grbm_soft_reset) {
3972                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3973                         tmp |= grbm_soft_reset;
3974                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3975                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3976                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3977
3978                         udelay(50);
3979
3980                         tmp &= ~grbm_soft_reset;
3981                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3982                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3983                 }
3984
3985                 /* Wait a little for things to settle down */
3986                 udelay(50);
3987         }
3988         return 0;
3989 }
3990
3991 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3992 {
3993         signed long r, cnt = 0;
3994         unsigned long flags;
3995         uint32_t seq, reg_val_offs = 0;
3996         uint64_t value = 0;
3997         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3998         struct amdgpu_ring *ring = &kiq->ring;
3999
4000         BUG_ON(!ring->funcs->emit_rreg);
4001
4002         spin_lock_irqsave(&kiq->ring_lock, flags);
4003         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4004                 pr_err("critical bug! too many kiq readers\n");
4005                 goto failed_unlock;
4006         }
4007         amdgpu_ring_alloc(ring, 32);
4008         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4009         amdgpu_ring_write(ring, 9 |     /* src: register*/
4010                                 (5 << 8) |      /* dst: memory */
4011                                 (1 << 16) |     /* count sel */
4012                                 (1 << 20));     /* write confirm */
4013         amdgpu_ring_write(ring, 0);
4014         amdgpu_ring_write(ring, 0);
4015         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4016                                 reg_val_offs * 4));
4017         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4018                                 reg_val_offs * 4));
4019         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4020         if (r)
4021                 goto failed_undo;
4022
4023         amdgpu_ring_commit(ring);
4024         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4025
4026         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4027
4028         /* don't wait anymore for gpu reset case because this way may
4029          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4030          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4031          * never return if we keep waiting in virt_kiq_rreg, which cause
4032          * gpu_recover() hang there.
4033          *
4034          * also don't wait anymore for IRQ context
4035          * */
4036         if (r < 1 && (amdgpu_in_reset(adev)))
4037                 goto failed_kiq_read;
4038
4039         might_sleep();
4040         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4041                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4042                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4043         }
4044
4045         if (cnt > MAX_KIQ_REG_TRY)
4046                 goto failed_kiq_read;
4047
4048         mb();
4049         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4050                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4051         amdgpu_device_wb_free(adev, reg_val_offs);
4052         return value;
4053
4054 failed_undo:
4055         amdgpu_ring_undo(ring);
4056 failed_unlock:
4057         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4058 failed_kiq_read:
4059         if (reg_val_offs)
4060                 amdgpu_device_wb_free(adev, reg_val_offs);
4061         pr_err("failed to read gpu clock\n");
4062         return ~0;
4063 }
4064
4065 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4066 {
4067         uint64_t clock, clock_lo, clock_hi, hi_check;
4068
4069         switch (adev->ip_versions[GC_HWIP][0]) {
4070         case IP_VERSION(9, 3, 0):
4071                 preempt_disable();
4072                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4073                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4074                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4075                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4076                  * roughly every 42 seconds.
4077                  */
4078                 if (hi_check != clock_hi) {
4079                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4080                         clock_hi = hi_check;
4081                 }
4082                 preempt_enable();
4083                 clock = clock_lo | (clock_hi << 32ULL);
4084                 break;
4085         default:
4086                 amdgpu_gfx_off_ctrl(adev, false);
4087                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4088                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4089                         clock = gfx_v9_0_kiq_read_clock(adev);
4090                 } else {
4091                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4092                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4093                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4094                 }
4095                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4096                 amdgpu_gfx_off_ctrl(adev, true);
4097                 break;
4098         }
4099         return clock;
4100 }
4101
4102 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4103                                           uint32_t vmid,
4104                                           uint32_t gds_base, uint32_t gds_size,
4105                                           uint32_t gws_base, uint32_t gws_size,
4106                                           uint32_t oa_base, uint32_t oa_size)
4107 {
4108         struct amdgpu_device *adev = ring->adev;
4109
4110         /* GDS Base */
4111         gfx_v9_0_write_data_to_reg(ring, 0, false,
4112                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4113                                    gds_base);
4114
4115         /* GDS Size */
4116         gfx_v9_0_write_data_to_reg(ring, 0, false,
4117                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4118                                    gds_size);
4119
4120         /* GWS */
4121         gfx_v9_0_write_data_to_reg(ring, 0, false,
4122                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4123                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4124
4125         /* OA */
4126         gfx_v9_0_write_data_to_reg(ring, 0, false,
4127                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4128                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4129 }
4130
4131 static const u32 vgpr_init_compute_shader[] =
4132 {
4133         0xb07c0000, 0xbe8000ff,
4134         0x000000f8, 0xbf110800,
4135         0x7e000280, 0x7e020280,
4136         0x7e040280, 0x7e060280,
4137         0x7e080280, 0x7e0a0280,
4138         0x7e0c0280, 0x7e0e0280,
4139         0x80808800, 0xbe803200,
4140         0xbf84fff5, 0xbf9c0000,
4141         0xd28c0001, 0x0001007f,
4142         0xd28d0001, 0x0002027e,
4143         0x10020288, 0xb8810904,
4144         0xb7814000, 0xd1196a01,
4145         0x00000301, 0xbe800087,
4146         0xbefc00c1, 0xd89c4000,
4147         0x00020201, 0xd89cc080,
4148         0x00040401, 0x320202ff,
4149         0x00000800, 0x80808100,
4150         0xbf84fff8, 0x7e020280,
4151         0xbf810000, 0x00000000,
4152 };
4153
4154 static const u32 sgpr_init_compute_shader[] =
4155 {
4156         0xb07c0000, 0xbe8000ff,
4157         0x0000005f, 0xbee50080,
4158         0xbe812c65, 0xbe822c65,
4159         0xbe832c65, 0xbe842c65,
4160         0xbe852c65, 0xb77c0005,
4161         0x80808500, 0xbf84fff8,
4162         0xbe800080, 0xbf810000,
4163 };
4164
4165 static const u32 vgpr_init_compute_shader_arcturus[] = {
4166         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4167         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4168         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4169         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4170         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4171         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4172         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4173         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4174         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4175         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4176         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4177         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4178         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4179         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4180         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4181         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4182         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4183         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4184         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4185         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4186         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4187         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4188         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4189         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4190         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4191         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4192         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4193         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4194         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4195         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4196         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4197         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4198         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4199         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4200         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4201         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4202         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4203         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4204         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4205         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4206         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4207         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4208         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4209         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4210         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4211         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4212         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4213         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4214         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4215         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4216         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4217         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4218         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4219         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4220         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4221         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4222         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4223         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4224         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4225         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4226         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4227         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4228         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4229         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4230         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4231         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4232         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4233         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4234         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4235         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4236         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4237         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4238         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4239         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4240         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4241         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4242         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4243         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4244         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4245         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4246         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4247         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4248         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4249         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4250         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4251         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4252         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4253         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4254         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4255         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4256         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4257         0xbf84fff8, 0xbf810000,
4258 };
4259
4260 /* When below register arrays changed, please update gpr_reg_size,
4261   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4262   to cover all gfx9 ASICs */
4263 static const struct soc15_reg_entry vgpr_init_regs[] = {
4264    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4265    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4266    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4267    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4268    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4269    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4270    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4271    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4272    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4273    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4274    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4275    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4276    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4277    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4278 };
4279
4280 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4281    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4282    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4283    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4284    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4285    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4286    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4287    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4288    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4289    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4290    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4291    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4292    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4293    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4294    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4295 };
4296
4297 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4298    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4299    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4300    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4301    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4302    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4303    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4304    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4305    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4306    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4307    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4308    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4309    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4310    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4311    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4312 };
4313
4314 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4315    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4316    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4317    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4318    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4319    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4320    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4321    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4322    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4323    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4324    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4325    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4326    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4327    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4328    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4329 };
4330
4331 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4332    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4333    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4334    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4335    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4336    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4337    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4338    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4339    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4340    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4341    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4342    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4343    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4344    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4345    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4346    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4347    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4348    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4349    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4350    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4351    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4352    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4353    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4354    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4355    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4356    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4357    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4358    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4359    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4360    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4361    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4362    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4363    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4364    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4365 };
4366
4367 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4368 {
4369         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4370         int i, r;
4371
4372         /* only support when RAS is enabled */
4373         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4374                 return 0;
4375
4376         r = amdgpu_ring_alloc(ring, 7);
4377         if (r) {
4378                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4379                         ring->name, r);
4380                 return r;
4381         }
4382
4383         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4384         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4385
4386         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4387         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4388                                 PACKET3_DMA_DATA_DST_SEL(1) |
4389                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4390                                 PACKET3_DMA_DATA_ENGINE(0)));
4391         amdgpu_ring_write(ring, 0);
4392         amdgpu_ring_write(ring, 0);
4393         amdgpu_ring_write(ring, 0);
4394         amdgpu_ring_write(ring, 0);
4395         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4396                                 adev->gds.gds_size);
4397
4398         amdgpu_ring_commit(ring);
4399
4400         for (i = 0; i < adev->usec_timeout; i++) {
4401                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4402                         break;
4403                 udelay(1);
4404         }
4405
4406         if (i >= adev->usec_timeout)
4407                 r = -ETIMEDOUT;
4408
4409         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4410
4411         return r;
4412 }
4413
4414 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4415 {
4416         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4417         struct amdgpu_ib ib;
4418         struct dma_fence *f = NULL;
4419         int r, i;
4420         unsigned total_size, vgpr_offset, sgpr_offset;
4421         u64 gpu_addr;
4422
4423         int compute_dim_x = adev->gfx.config.max_shader_engines *
4424                                                 adev->gfx.config.max_cu_per_sh *
4425                                                 adev->gfx.config.max_sh_per_se;
4426         int sgpr_work_group_size = 5;
4427         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4428         int vgpr_init_shader_size;
4429         const u32 *vgpr_init_shader_ptr;
4430         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4431
4432         /* only support when RAS is enabled */
4433         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4434                 return 0;
4435
4436         /* bail if the compute ring is not ready */
4437         if (!ring->sched.ready)
4438                 return 0;
4439
4440         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4441                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4442                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4443                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4444         } else {
4445                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4446                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4447                 vgpr_init_regs_ptr = vgpr_init_regs;
4448         }
4449
4450         total_size =
4451                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4452         total_size +=
4453                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4454         total_size +=
4455                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4456         total_size = ALIGN(total_size, 256);
4457         vgpr_offset = total_size;
4458         total_size += ALIGN(vgpr_init_shader_size, 256);
4459         sgpr_offset = total_size;
4460         total_size += sizeof(sgpr_init_compute_shader);
4461
4462         /* allocate an indirect buffer to put the commands in */
4463         memset(&ib, 0, sizeof(ib));
4464         r = amdgpu_ib_get(adev, NULL, total_size,
4465                                         AMDGPU_IB_POOL_DIRECT, &ib);
4466         if (r) {
4467                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4468                 return r;
4469         }
4470
4471         /* load the compute shaders */
4472         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4473                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4474
4475         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4476                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4477
4478         /* init the ib length to 0 */
4479         ib.length_dw = 0;
4480
4481         /* VGPR */
4482         /* write the register state for the compute dispatch */
4483         for (i = 0; i < gpr_reg_size; i++) {
4484                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4485                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4486                                                                 - PACKET3_SET_SH_REG_START;
4487                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4488         }
4489         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4490         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4491         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4492         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4493                                                         - PACKET3_SET_SH_REG_START;
4494         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4495         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4496
4497         /* write dispatch packet */
4498         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4499         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4500         ib.ptr[ib.length_dw++] = 1; /* y */
4501         ib.ptr[ib.length_dw++] = 1; /* z */
4502         ib.ptr[ib.length_dw++] =
4503                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4504
4505         /* write CS partial flush packet */
4506         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4507         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4508
4509         /* SGPR1 */
4510         /* write the register state for the compute dispatch */
4511         for (i = 0; i < gpr_reg_size; i++) {
4512                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4513                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4514                                                                 - PACKET3_SET_SH_REG_START;
4515                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4516         }
4517         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4518         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4519         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4520         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4521                                                         - PACKET3_SET_SH_REG_START;
4522         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4523         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4524
4525         /* write dispatch packet */
4526         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4527         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4528         ib.ptr[ib.length_dw++] = 1; /* y */
4529         ib.ptr[ib.length_dw++] = 1; /* z */
4530         ib.ptr[ib.length_dw++] =
4531                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4532
4533         /* write CS partial flush packet */
4534         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4535         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4536
4537         /* SGPR2 */
4538         /* write the register state for the compute dispatch */
4539         for (i = 0; i < gpr_reg_size; i++) {
4540                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4541                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4542                                                                 - PACKET3_SET_SH_REG_START;
4543                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4544         }
4545         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4546         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4547         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4548         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4549                                                         - PACKET3_SET_SH_REG_START;
4550         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4551         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4552
4553         /* write dispatch packet */
4554         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4555         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4556         ib.ptr[ib.length_dw++] = 1; /* y */
4557         ib.ptr[ib.length_dw++] = 1; /* z */
4558         ib.ptr[ib.length_dw++] =
4559                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4560
4561         /* write CS partial flush packet */
4562         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4563         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4564
4565         /* shedule the ib on the ring */
4566         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4567         if (r) {
4568                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4569                 goto fail;
4570         }
4571
4572         /* wait for the GPU to finish processing the IB */
4573         r = dma_fence_wait(f, false);
4574         if (r) {
4575                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4576                 goto fail;
4577         }
4578
4579 fail:
4580         amdgpu_ib_free(adev, &ib, NULL);
4581         dma_fence_put(f);
4582
4583         return r;
4584 }
4585
4586 static int gfx_v9_0_early_init(void *handle)
4587 {
4588         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4589
4590         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4591
4592         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4593             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4594                 adev->gfx.num_gfx_rings = 0;
4595         else
4596                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4597         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4598                                           AMDGPU_MAX_COMPUTE_RINGS);
4599         gfx_v9_0_set_kiq_pm4_funcs(adev);
4600         gfx_v9_0_set_ring_funcs(adev);
4601         gfx_v9_0_set_irq_funcs(adev);
4602         gfx_v9_0_set_gds_init(adev);
4603         gfx_v9_0_set_rlc_funcs(adev);
4604
4605         /* init rlcg reg access ctrl */
4606         gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4607
4608         return 0;
4609 }
4610
4611 static int gfx_v9_0_ecc_late_init(void *handle)
4612 {
4613         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4614         int r;
4615
4616         /*
4617          * Temp workaround to fix the issue that CP firmware fails to
4618          * update read pointer when CPDMA is writing clearing operation
4619          * to GDS in suspend/resume sequence on several cards. So just
4620          * limit this operation in cold boot sequence.
4621          */
4622         if ((!adev->in_suspend) &&
4623             (adev->gds.gds_size)) {
4624                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4625                 if (r)
4626                         return r;
4627         }
4628
4629         /* requires IBs so do in late init after IB pool is initialized */
4630         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4631                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4632         else
4633                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4634
4635         if (r)
4636                 return r;
4637
4638         if (adev->gfx.ras &&
4639             adev->gfx.ras->enable_watchdog_timer)
4640                 adev->gfx.ras->enable_watchdog_timer(adev);
4641
4642         return 0;
4643 }
4644
4645 static int gfx_v9_0_late_init(void *handle)
4646 {
4647         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4648         int r;
4649
4650         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4651         if (r)
4652                 return r;
4653
4654         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4655         if (r)
4656                 return r;
4657
4658         r = gfx_v9_0_ecc_late_init(handle);
4659         if (r)
4660                 return r;
4661
4662         return 0;
4663 }
4664
4665 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4666 {
4667         uint32_t rlc_setting;
4668
4669         /* if RLC is not enabled, do nothing */
4670         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4671         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4672                 return false;
4673
4674         return true;
4675 }
4676
4677 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4678 {
4679         uint32_t data;
4680         unsigned i;
4681
4682         data = RLC_SAFE_MODE__CMD_MASK;
4683         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4684         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4685
4686         /* wait for RLC_SAFE_MODE */
4687         for (i = 0; i < adev->usec_timeout; i++) {
4688                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4689                         break;
4690                 udelay(1);
4691         }
4692 }
4693
4694 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4695 {
4696         uint32_t data;
4697
4698         data = RLC_SAFE_MODE__CMD_MASK;
4699         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4700 }
4701
4702 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4703                                                 bool enable)
4704 {
4705         amdgpu_gfx_rlc_enter_safe_mode(adev);
4706
4707         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4708                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4709                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4710                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4711         } else {
4712                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4713                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4714                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4715         }
4716
4717         amdgpu_gfx_rlc_exit_safe_mode(adev);
4718 }
4719
4720 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4721                                                 bool enable)
4722 {
4723         /* TODO: double check if we need to perform under safe mode */
4724         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4725
4726         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4727                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4728         else
4729                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4730
4731         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4732                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4733         else
4734                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4735
4736         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4737 }
4738
4739 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4740                                                       bool enable)
4741 {
4742         uint32_t data, def;
4743
4744         amdgpu_gfx_rlc_enter_safe_mode(adev);
4745
4746         /* It is disabled by HW by default */
4747         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4748                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4749                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4750
4751                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4752                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4753
4754                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4755                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4756                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4757
4758                 /* only for Vega10 & Raven1 */
4759                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4760
4761                 if (def != data)
4762                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4763
4764                 /* MGLS is a global flag to control all MGLS in GFX */
4765                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4766                         /* 2 - RLC memory Light sleep */
4767                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4768                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4769                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4770                                 if (def != data)
4771                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4772                         }
4773                         /* 3 - CP memory Light sleep */
4774                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4775                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4776                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4777                                 if (def != data)
4778                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4779                         }
4780                 }
4781         } else {
4782                 /* 1 - MGCG_OVERRIDE */
4783                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4784
4785                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4786                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4787
4788                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4789                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4790                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4791                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4792
4793                 if (def != data)
4794                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4795
4796                 /* 2 - disable MGLS in RLC */
4797                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4798                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4799                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4800                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4801                 }
4802
4803                 /* 3 - disable MGLS in CP */
4804                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4805                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4806                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4807                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4808                 }
4809         }
4810
4811         amdgpu_gfx_rlc_exit_safe_mode(adev);
4812 }
4813
4814 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4815                                            bool enable)
4816 {
4817         uint32_t data, def;
4818
4819         if (!adev->gfx.num_gfx_rings)
4820                 return;
4821
4822         amdgpu_gfx_rlc_enter_safe_mode(adev);
4823
4824         /* Enable 3D CGCG/CGLS */
4825         if (enable) {
4826                 /* write cmd to clear cgcg/cgls ov */
4827                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4828                 /* unset CGCG override */
4829                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4830                 /* update CGCG and CGLS override bits */
4831                 if (def != data)
4832                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4833
4834                 /* enable 3Dcgcg FSM(0x0000363f) */
4835                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4836
4837                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4838                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4839                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4840                 else
4841                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4842
4843                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4844                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4845                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4846                 if (def != data)
4847                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4848
4849                 /* set IDLE_POLL_COUNT(0x00900100) */
4850                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4851                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4852                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4853                 if (def != data)
4854                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4855         } else {
4856                 /* Disable CGCG/CGLS */
4857                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4858                 /* disable cgcg, cgls should be disabled */
4859                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4860                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4861                 /* disable cgcg and cgls in FSM */
4862                 if (def != data)
4863                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4864         }
4865
4866         amdgpu_gfx_rlc_exit_safe_mode(adev);
4867 }
4868
4869 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4870                                                       bool enable)
4871 {
4872         uint32_t def, data;
4873
4874         amdgpu_gfx_rlc_enter_safe_mode(adev);
4875
4876         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4877                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4878                 /* unset CGCG override */
4879                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4880                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4881                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4882                 else
4883                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4884                 /* update CGCG and CGLS override bits */
4885                 if (def != data)
4886                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4887
4888                 /* enable cgcg FSM(0x0000363F) */
4889                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4890
4891                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
4892                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4893                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4894                 else
4895                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4896                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4897                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4898                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4899                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4900                 if (def != data)
4901                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4902
4903                 /* set IDLE_POLL_COUNT(0x00900100) */
4904                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4905                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4906                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4907                 if (def != data)
4908                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4909         } else {
4910                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4911                 /* reset CGCG/CGLS bits */
4912                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4913                 /* disable cgcg and cgls in FSM */
4914                 if (def != data)
4915                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4916         }
4917
4918         amdgpu_gfx_rlc_exit_safe_mode(adev);
4919 }
4920
4921 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4922                                             bool enable)
4923 {
4924         if (enable) {
4925                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4926                  * ===  MGCG + MGLS ===
4927                  */
4928                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4929                 /* ===  CGCG /CGLS for GFX 3D Only === */
4930                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4931                 /* ===  CGCG + CGLS === */
4932                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4933         } else {
4934                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4935                  * ===  CGCG + CGLS ===
4936                  */
4937                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4938                 /* ===  CGCG /CGLS for GFX 3D Only === */
4939                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4940                 /* ===  MGCG + MGLS === */
4941                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4942         }
4943         return 0;
4944 }
4945
4946 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4947 {
4948         u32 reg, data;
4949
4950         amdgpu_gfx_off_ctrl(adev, false);
4951
4952         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4953         if (amdgpu_sriov_is_pp_one_vf(adev))
4954                 data = RREG32_NO_KIQ(reg);
4955         else
4956                 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4957
4958         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4959         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4960
4961         if (amdgpu_sriov_is_pp_one_vf(adev))
4962                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4963         else
4964                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4965
4966         amdgpu_gfx_off_ctrl(adev, true);
4967 }
4968
4969 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4970                                         uint32_t offset,
4971                                         struct soc15_reg_rlcg *entries, int arr_size)
4972 {
4973         int i;
4974         uint32_t reg;
4975
4976         if (!entries)
4977                 return false;
4978
4979         for (i = 0; i < arr_size; i++) {
4980                 const struct soc15_reg_rlcg *entry;
4981
4982                 entry = &entries[i];
4983                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4984                 if (offset == reg)
4985                         return true;
4986         }
4987
4988         return false;
4989 }
4990
4991 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4992 {
4993         return gfx_v9_0_check_rlcg_range(adev, offset,
4994                                         (void *)rlcg_access_gc_9_0,
4995                                         ARRAY_SIZE(rlcg_access_gc_9_0));
4996 }
4997
4998 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4999         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5000         .set_safe_mode = gfx_v9_0_set_safe_mode,
5001         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5002         .init = gfx_v9_0_rlc_init,
5003         .get_csb_size = gfx_v9_0_get_csb_size,
5004         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5005         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5006         .resume = gfx_v9_0_rlc_resume,
5007         .stop = gfx_v9_0_rlc_stop,
5008         .reset = gfx_v9_0_rlc_reset,
5009         .start = gfx_v9_0_rlc_start,
5010         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5011         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5012 };
5013
5014 static int gfx_v9_0_set_powergating_state(void *handle,
5015                                           enum amd_powergating_state state)
5016 {
5017         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5018         bool enable = (state == AMD_PG_STATE_GATE);
5019
5020         switch (adev->ip_versions[GC_HWIP][0]) {
5021         case IP_VERSION(9, 2, 2):
5022         case IP_VERSION(9, 1, 0):
5023         case IP_VERSION(9, 3, 0):
5024                 if (!enable)
5025                         amdgpu_gfx_off_ctrl(adev, false);
5026
5027                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5028                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5029                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5030                 } else {
5031                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5032                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5033                 }
5034
5035                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5036                         gfx_v9_0_enable_cp_power_gating(adev, true);
5037                 else
5038                         gfx_v9_0_enable_cp_power_gating(adev, false);
5039
5040                 /* update gfx cgpg state */
5041                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5042
5043                 /* update mgcg state */
5044                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5045
5046                 if (enable)
5047                         amdgpu_gfx_off_ctrl(adev, true);
5048                 break;
5049         case IP_VERSION(9, 2, 1):
5050                 amdgpu_gfx_off_ctrl(adev, enable);
5051                 break;
5052         default:
5053                 break;
5054         }
5055
5056         return 0;
5057 }
5058
5059 static int gfx_v9_0_set_clockgating_state(void *handle,
5060                                           enum amd_clockgating_state state)
5061 {
5062         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5063
5064         if (amdgpu_sriov_vf(adev))
5065                 return 0;
5066
5067         switch (adev->ip_versions[GC_HWIP][0]) {
5068         case IP_VERSION(9, 0, 1):
5069         case IP_VERSION(9, 2, 1):
5070         case IP_VERSION(9, 4, 0):
5071         case IP_VERSION(9, 2, 2):
5072         case IP_VERSION(9, 1, 0):
5073         case IP_VERSION(9, 4, 1):
5074         case IP_VERSION(9, 3, 0):
5075         case IP_VERSION(9, 4, 2):
5076                 gfx_v9_0_update_gfx_clock_gating(adev,
5077                                                  state == AMD_CG_STATE_GATE);
5078                 break;
5079         default:
5080                 break;
5081         }
5082         return 0;
5083 }
5084
5085 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5086 {
5087         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5088         int data;
5089
5090         if (amdgpu_sriov_vf(adev))
5091                 *flags = 0;
5092
5093         /* AMD_CG_SUPPORT_GFX_MGCG */
5094         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5095         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5096                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5097
5098         /* AMD_CG_SUPPORT_GFX_CGCG */
5099         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5100         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5101                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5102
5103         /* AMD_CG_SUPPORT_GFX_CGLS */
5104         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5105                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5106
5107         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5108         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5109         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5110                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5111
5112         /* AMD_CG_SUPPORT_GFX_CP_LS */
5113         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5114         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5115                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5116
5117         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5118                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5119                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5120                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5121                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5122
5123                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5124                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5125                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5126         }
5127 }
5128
5129 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5130 {
5131         return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5132 }
5133
5134 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5135 {
5136         struct amdgpu_device *adev = ring->adev;
5137         u64 wptr;
5138
5139         /* XXX check if swapping is necessary on BE */
5140         if (ring->use_doorbell) {
5141                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5142         } else {
5143                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5144                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5145         }
5146
5147         return wptr;
5148 }
5149
5150 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5151 {
5152         struct amdgpu_device *adev = ring->adev;
5153
5154         if (ring->use_doorbell) {
5155                 /* XXX check if swapping is necessary on BE */
5156                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5157                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5158         } else {
5159                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5160                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5161         }
5162 }
5163
5164 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5165 {
5166         struct amdgpu_device *adev = ring->adev;
5167         u32 ref_and_mask, reg_mem_engine;
5168         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5169
5170         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5171                 switch (ring->me) {
5172                 case 1:
5173                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5174                         break;
5175                 case 2:
5176                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5177                         break;
5178                 default:
5179                         return;
5180                 }
5181                 reg_mem_engine = 0;
5182         } else {
5183                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5184                 reg_mem_engine = 1; /* pfp */
5185         }
5186
5187         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5188                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5189                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5190                               ref_and_mask, ref_and_mask, 0x20);
5191 }
5192
5193 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5194                                         struct amdgpu_job *job,
5195                                         struct amdgpu_ib *ib,
5196                                         uint32_t flags)
5197 {
5198         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5199         u32 header, control = 0;
5200
5201         if (ib->flags & AMDGPU_IB_FLAG_CE)
5202                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5203         else
5204                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5205
5206         control |= ib->length_dw | (vmid << 24);
5207
5208         if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5209                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5210
5211                 if (flags & AMDGPU_IB_PREEMPTED)
5212                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5213
5214                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5215                         gfx_v9_0_ring_emit_de_meta(ring,
5216                                                    (!amdgpu_sriov_vf(ring->adev) &&
5217                                                    flags & AMDGPU_IB_PREEMPTED) ?
5218                                                    true : false);
5219         }
5220
5221         amdgpu_ring_write(ring, header);
5222         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5223         amdgpu_ring_write(ring,
5224 #ifdef __BIG_ENDIAN
5225                 (2 << 0) |
5226 #endif
5227                 lower_32_bits(ib->gpu_addr));
5228         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5229         amdgpu_ring_write(ring, control);
5230 }
5231
5232 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5233                                           struct amdgpu_job *job,
5234                                           struct amdgpu_ib *ib,
5235                                           uint32_t flags)
5236 {
5237         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5238         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5239
5240         /* Currently, there is a high possibility to get wave ID mismatch
5241          * between ME and GDS, leading to a hw deadlock, because ME generates
5242          * different wave IDs than the GDS expects. This situation happens
5243          * randomly when at least 5 compute pipes use GDS ordered append.
5244          * The wave IDs generated by ME are also wrong after suspend/resume.
5245          * Those are probably bugs somewhere else in the kernel driver.
5246          *
5247          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5248          * GDS to 0 for this ring (me/pipe).
5249          */
5250         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5251                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5252                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5253                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5254         }
5255
5256         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5257         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5258         amdgpu_ring_write(ring,
5259 #ifdef __BIG_ENDIAN
5260                                 (2 << 0) |
5261 #endif
5262                                 lower_32_bits(ib->gpu_addr));
5263         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5264         amdgpu_ring_write(ring, control);
5265 }
5266
5267 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5268                                      u64 seq, unsigned flags)
5269 {
5270         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5271         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5272         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5273         bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5274         uint32_t dw2 = 0;
5275
5276         /* RELEASE_MEM - flush caches, send int */
5277         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5278
5279         if (writeback) {
5280                 dw2 = EOP_TC_NC_ACTION_EN;
5281         } else {
5282                 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5283                                 EOP_TC_MD_ACTION_EN;
5284         }
5285         dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5286                                 EVENT_INDEX(5);
5287         if (exec)
5288                 dw2 |= EOP_EXEC;
5289
5290         amdgpu_ring_write(ring, dw2);
5291         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5292
5293         /*
5294          * the address should be Qword aligned if 64bit write, Dword
5295          * aligned if only send 32bit data low (discard data high)
5296          */
5297         if (write64bit)
5298                 BUG_ON(addr & 0x7);
5299         else
5300                 BUG_ON(addr & 0x3);
5301         amdgpu_ring_write(ring, lower_32_bits(addr));
5302         amdgpu_ring_write(ring, upper_32_bits(addr));
5303         amdgpu_ring_write(ring, lower_32_bits(seq));
5304         amdgpu_ring_write(ring, upper_32_bits(seq));
5305         amdgpu_ring_write(ring, 0);
5306 }
5307
5308 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5309 {
5310         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5311         uint32_t seq = ring->fence_drv.sync_seq;
5312         uint64_t addr = ring->fence_drv.gpu_addr;
5313
5314         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5315                               lower_32_bits(addr), upper_32_bits(addr),
5316                               seq, 0xffffffff, 4);
5317 }
5318
5319 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5320                                         unsigned vmid, uint64_t pd_addr)
5321 {
5322         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5323
5324         /* compute doesn't have PFP */
5325         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5326                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5327                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5328                 amdgpu_ring_write(ring, 0x0);
5329         }
5330 }
5331
5332 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5333 {
5334         return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5335 }
5336
5337 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5338 {
5339         u64 wptr;
5340
5341         /* XXX check if swapping is necessary on BE */
5342         if (ring->use_doorbell)
5343                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5344         else
5345                 BUG();
5346         return wptr;
5347 }
5348
5349 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5350 {
5351         struct amdgpu_device *adev = ring->adev;
5352
5353         /* XXX check if swapping is necessary on BE */
5354         if (ring->use_doorbell) {
5355                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5356                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5357         } else{
5358                 BUG(); /* only DOORBELL method supported on gfx9 now */
5359         }
5360 }
5361
5362 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5363                                          u64 seq, unsigned int flags)
5364 {
5365         struct amdgpu_device *adev = ring->adev;
5366
5367         /* we only allocate 32bit for each seq wb address */
5368         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5369
5370         /* write fence seq to the "addr" */
5371         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5372         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5373                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5374         amdgpu_ring_write(ring, lower_32_bits(addr));
5375         amdgpu_ring_write(ring, upper_32_bits(addr));
5376         amdgpu_ring_write(ring, lower_32_bits(seq));
5377
5378         if (flags & AMDGPU_FENCE_FLAG_INT) {
5379                 /* set register to trigger INT */
5380                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5381                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5382                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5383                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5384                 amdgpu_ring_write(ring, 0);
5385                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5386         }
5387 }
5388
5389 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5390 {
5391         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5392         amdgpu_ring_write(ring, 0);
5393 }
5394
5395 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5396 {
5397         struct amdgpu_device *adev = ring->adev;
5398         struct v9_ce_ib_state ce_payload = {0};
5399         uint64_t offset, ce_payload_gpu_addr;
5400         void *ce_payload_cpu_addr;
5401         int cnt;
5402
5403         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5404
5405         if (ring->is_mes_queue) {
5406                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5407                                   gfx[0].gfx_meta_data) +
5408                         offsetof(struct v9_gfx_meta_data, ce_payload);
5409                 ce_payload_gpu_addr =
5410                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5411                 ce_payload_cpu_addr =
5412                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5413         } else {
5414                 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5415                 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5416                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5417         }
5418
5419         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5420         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5421                                  WRITE_DATA_DST_SEL(8) |
5422                                  WR_CONFIRM) |
5423                                  WRITE_DATA_CACHE_POLICY(0));
5424         amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5425         amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5426
5427         if (resume)
5428                 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5429                                            sizeof(ce_payload) >> 2);
5430         else
5431                 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5432                                            sizeof(ce_payload) >> 2);
5433 }
5434
5435 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5436 {
5437         int i, r = 0;
5438         struct amdgpu_device *adev = ring->adev;
5439         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5440         struct amdgpu_ring *kiq_ring = &kiq->ring;
5441         unsigned long flags;
5442
5443         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5444                 return -EINVAL;
5445
5446         spin_lock_irqsave(&kiq->ring_lock, flags);
5447
5448         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5449                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5450                 return -ENOMEM;
5451         }
5452
5453         /* assert preemption condition */
5454         amdgpu_ring_set_preempt_cond_exec(ring, false);
5455
5456         ring->trail_seq += 1;
5457         amdgpu_ring_alloc(ring, 13);
5458         gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5459                                  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5460         /*reset the CP_VMID_PREEMPT after trailing fence*/
5461         amdgpu_ring_emit_wreg(ring,
5462                               SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5463                               0x0);
5464
5465         /* assert IB preemption, emit the trailing fence */
5466         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5467                                    ring->trail_fence_gpu_addr,
5468                                    ring->trail_seq);
5469
5470         amdgpu_ring_commit(kiq_ring);
5471         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5472
5473         /* poll the trailing fence */
5474         for (i = 0; i < adev->usec_timeout; i++) {
5475                 if (ring->trail_seq ==
5476                         le32_to_cpu(*ring->trail_fence_cpu_addr))
5477                         break;
5478                 udelay(1);
5479         }
5480
5481         if (i >= adev->usec_timeout) {
5482                 r = -EINVAL;
5483                 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5484         }
5485
5486         amdgpu_ring_commit(ring);
5487
5488         /* deassert preemption condition */
5489         amdgpu_ring_set_preempt_cond_exec(ring, true);
5490         return r;
5491 }
5492
5493 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5494 {
5495         struct amdgpu_device *adev = ring->adev;
5496         struct v9_de_ib_state de_payload = {0};
5497         uint64_t offset, gds_addr, de_payload_gpu_addr;
5498         void *de_payload_cpu_addr;
5499         int cnt;
5500
5501         if (ring->is_mes_queue) {
5502                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5503                                   gfx[0].gfx_meta_data) +
5504                         offsetof(struct v9_gfx_meta_data, de_payload);
5505                 de_payload_gpu_addr =
5506                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5507                 de_payload_cpu_addr =
5508                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5509
5510                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5511                                   gfx[0].gds_backup) +
5512                         offsetof(struct v9_gfx_meta_data, de_payload);
5513                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5514         } else {
5515                 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5516                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5517                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5518
5519                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5520                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5521                                  PAGE_SIZE);
5522         }
5523
5524         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5525         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5526
5527         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5528         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5529         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5530                                  WRITE_DATA_DST_SEL(8) |
5531                                  WR_CONFIRM) |
5532                                  WRITE_DATA_CACHE_POLICY(0));
5533         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5534         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5535
5536         if (resume)
5537                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5538                                            sizeof(de_payload) >> 2);
5539         else
5540                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5541                                            sizeof(de_payload) >> 2);
5542 }
5543
5544 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5545                                    bool secure)
5546 {
5547         uint32_t v = secure ? FRAME_TMZ : 0;
5548
5549         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5550         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5551 }
5552
5553 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5554 {
5555         uint32_t dw2 = 0;
5556
5557         gfx_v9_0_ring_emit_ce_meta(ring,
5558                                    (!amdgpu_sriov_vf(ring->adev) &&
5559                                    flags & AMDGPU_IB_PREEMPTED) ? true : false);
5560
5561         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5562         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5563                 /* set load_global_config & load_global_uconfig */
5564                 dw2 |= 0x8001;
5565                 /* set load_cs_sh_regs */
5566                 dw2 |= 0x01000000;
5567                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5568                 dw2 |= 0x10002;
5569
5570                 /* set load_ce_ram if preamble presented */
5571                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5572                         dw2 |= 0x10000000;
5573         } else {
5574                 /* still load_ce_ram if this is the first time preamble presented
5575                  * although there is no context switch happens.
5576                  */
5577                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5578                         dw2 |= 0x10000000;
5579         }
5580
5581         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5582         amdgpu_ring_write(ring, dw2);
5583         amdgpu_ring_write(ring, 0);
5584 }
5585
5586 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5587 {
5588         unsigned ret;
5589         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5590         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5591         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5592         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5593         ret = ring->wptr & ring->buf_mask;
5594         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5595         return ret;
5596 }
5597
5598 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5599 {
5600         unsigned cur;
5601         BUG_ON(offset > ring->buf_mask);
5602         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5603
5604         cur = (ring->wptr - 1) & ring->buf_mask;
5605         if (likely(cur > offset))
5606                 ring->ring[offset] = cur - offset;
5607         else
5608                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5609 }
5610
5611 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5612                                     uint32_t reg_val_offs)
5613 {
5614         struct amdgpu_device *adev = ring->adev;
5615
5616         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5617         amdgpu_ring_write(ring, 0 |     /* src: register*/
5618                                 (5 << 8) |      /* dst: memory */
5619                                 (1 << 20));     /* write confirm */
5620         amdgpu_ring_write(ring, reg);
5621         amdgpu_ring_write(ring, 0);
5622         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5623                                 reg_val_offs * 4));
5624         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5625                                 reg_val_offs * 4));
5626 }
5627
5628 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5629                                     uint32_t val)
5630 {
5631         uint32_t cmd = 0;
5632
5633         switch (ring->funcs->type) {
5634         case AMDGPU_RING_TYPE_GFX:
5635                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5636                 break;
5637         case AMDGPU_RING_TYPE_KIQ:
5638                 cmd = (1 << 16); /* no inc addr */
5639                 break;
5640         default:
5641                 cmd = WR_CONFIRM;
5642                 break;
5643         }
5644         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5645         amdgpu_ring_write(ring, cmd);
5646         amdgpu_ring_write(ring, reg);
5647         amdgpu_ring_write(ring, 0);
5648         amdgpu_ring_write(ring, val);
5649 }
5650
5651 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5652                                         uint32_t val, uint32_t mask)
5653 {
5654         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5655 }
5656
5657 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5658                                                   uint32_t reg0, uint32_t reg1,
5659                                                   uint32_t ref, uint32_t mask)
5660 {
5661         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5662         struct amdgpu_device *adev = ring->adev;
5663         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5664                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5665
5666         if (fw_version_ok)
5667                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5668                                       ref, mask, 0x20);
5669         else
5670                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5671                                                            ref, mask);
5672 }
5673
5674 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5675 {
5676         struct amdgpu_device *adev = ring->adev;
5677         uint32_t value = 0;
5678
5679         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5680         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5681         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5682         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5683         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5684 }
5685
5686 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5687                                                  enum amdgpu_interrupt_state state)
5688 {
5689         switch (state) {
5690         case AMDGPU_IRQ_STATE_DISABLE:
5691         case AMDGPU_IRQ_STATE_ENABLE:
5692                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5693                                TIME_STAMP_INT_ENABLE,
5694                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5695                 break;
5696         default:
5697                 break;
5698         }
5699 }
5700
5701 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5702                                                      int me, int pipe,
5703                                                      enum amdgpu_interrupt_state state)
5704 {
5705         u32 mec_int_cntl, mec_int_cntl_reg;
5706
5707         /*
5708          * amdgpu controls only the first MEC. That's why this function only
5709          * handles the setting of interrupts for this specific MEC. All other
5710          * pipes' interrupts are set by amdkfd.
5711          */
5712
5713         if (me == 1) {
5714                 switch (pipe) {
5715                 case 0:
5716                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5717                         break;
5718                 case 1:
5719                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5720                         break;
5721                 case 2:
5722                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5723                         break;
5724                 case 3:
5725                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5726                         break;
5727                 default:
5728                         DRM_DEBUG("invalid pipe %d\n", pipe);
5729                         return;
5730                 }
5731         } else {
5732                 DRM_DEBUG("invalid me %d\n", me);
5733                 return;
5734         }
5735
5736         switch (state) {
5737         case AMDGPU_IRQ_STATE_DISABLE:
5738                 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5739                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5740                                              TIME_STAMP_INT_ENABLE, 0);
5741                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5742                 break;
5743         case AMDGPU_IRQ_STATE_ENABLE:
5744                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5745                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5746                                              TIME_STAMP_INT_ENABLE, 1);
5747                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5748                 break;
5749         default:
5750                 break;
5751         }
5752 }
5753
5754 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5755                                              struct amdgpu_irq_src *source,
5756                                              unsigned type,
5757                                              enum amdgpu_interrupt_state state)
5758 {
5759         switch (state) {
5760         case AMDGPU_IRQ_STATE_DISABLE:
5761         case AMDGPU_IRQ_STATE_ENABLE:
5762                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5763                                PRIV_REG_INT_ENABLE,
5764                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5765                 break;
5766         default:
5767                 break;
5768         }
5769
5770         return 0;
5771 }
5772
5773 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5774                                               struct amdgpu_irq_src *source,
5775                                               unsigned type,
5776                                               enum amdgpu_interrupt_state state)
5777 {
5778         switch (state) {
5779         case AMDGPU_IRQ_STATE_DISABLE:
5780         case AMDGPU_IRQ_STATE_ENABLE:
5781                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5782                                PRIV_INSTR_INT_ENABLE,
5783                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5784                 break;
5785         default:
5786                 break;
5787         }
5788
5789         return 0;
5790 }
5791
5792 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5793         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5794                         CP_ECC_ERROR_INT_ENABLE, 1)
5795
5796 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5797         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5798                         CP_ECC_ERROR_INT_ENABLE, 0)
5799
5800 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5801                                               struct amdgpu_irq_src *source,
5802                                               unsigned type,
5803                                               enum amdgpu_interrupt_state state)
5804 {
5805         switch (state) {
5806         case AMDGPU_IRQ_STATE_DISABLE:
5807                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5808                                 CP_ECC_ERROR_INT_ENABLE, 0);
5809                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5810                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5811                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5812                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5813                 break;
5814
5815         case AMDGPU_IRQ_STATE_ENABLE:
5816                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5817                                 CP_ECC_ERROR_INT_ENABLE, 1);
5818                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5819                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5820                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5821                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5822                 break;
5823         default:
5824                 break;
5825         }
5826
5827         return 0;
5828 }
5829
5830
5831 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5832                                             struct amdgpu_irq_src *src,
5833                                             unsigned type,
5834                                             enum amdgpu_interrupt_state state)
5835 {
5836         switch (type) {
5837         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5838                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5839                 break;
5840         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5841                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5842                 break;
5843         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5844                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5845                 break;
5846         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5847                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5848                 break;
5849         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5850                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5851                 break;
5852         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5853                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5854                 break;
5855         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5856                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5857                 break;
5858         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5859                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5860                 break;
5861         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5862                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5863                 break;
5864         default:
5865                 break;
5866         }
5867         return 0;
5868 }
5869
5870 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5871                             struct amdgpu_irq_src *source,
5872                             struct amdgpu_iv_entry *entry)
5873 {
5874         int i;
5875         u8 me_id, pipe_id, queue_id;
5876         struct amdgpu_ring *ring;
5877
5878         DRM_DEBUG("IH: CP EOP\n");
5879         me_id = (entry->ring_id & 0x0c) >> 2;
5880         pipe_id = (entry->ring_id & 0x03) >> 0;
5881         queue_id = (entry->ring_id & 0x70) >> 4;
5882
5883         switch (me_id) {
5884         case 0:
5885                 if (adev->gfx.num_gfx_rings &&
5886                     !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
5887                         /* Fence signals are handled on the software rings*/
5888                         for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5889                                 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5890                 }
5891                 break;
5892         case 1:
5893         case 2:
5894                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5895                         ring = &adev->gfx.compute_ring[i];
5896                         /* Per-queue interrupt is supported for MEC starting from VI.
5897                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5898                           */
5899                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5900                                 amdgpu_fence_process(ring);
5901                 }
5902                 break;
5903         }
5904         return 0;
5905 }
5906
5907 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5908                            struct amdgpu_iv_entry *entry)
5909 {
5910         u8 me_id, pipe_id, queue_id;
5911         struct amdgpu_ring *ring;
5912         int i;
5913
5914         me_id = (entry->ring_id & 0x0c) >> 2;
5915         pipe_id = (entry->ring_id & 0x03) >> 0;
5916         queue_id = (entry->ring_id & 0x70) >> 4;
5917
5918         switch (me_id) {
5919         case 0:
5920                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5921                 break;
5922         case 1:
5923         case 2:
5924                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5925                         ring = &adev->gfx.compute_ring[i];
5926                         if (ring->me == me_id && ring->pipe == pipe_id &&
5927                             ring->queue == queue_id)
5928                                 drm_sched_fault(&ring->sched);
5929                 }
5930                 break;
5931         }
5932 }
5933
5934 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5935                                  struct amdgpu_irq_src *source,
5936                                  struct amdgpu_iv_entry *entry)
5937 {
5938         DRM_ERROR("Illegal register access in command stream\n");
5939         gfx_v9_0_fault(adev, entry);
5940         return 0;
5941 }
5942
5943 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5944                                   struct amdgpu_irq_src *source,
5945                                   struct amdgpu_iv_entry *entry)
5946 {
5947         DRM_ERROR("Illegal instruction in command stream\n");
5948         gfx_v9_0_fault(adev, entry);
5949         return 0;
5950 }
5951
5952
5953 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5954         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5955           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5956           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5957         },
5958         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5959           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5960           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5961         },
5962         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5963           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5964           0, 0
5965         },
5966         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5967           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5968           0, 0
5969         },
5970         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5971           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5972           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5973         },
5974         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5975           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5976           0, 0
5977         },
5978         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5979           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5980           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5981         },
5982         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5983           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5984           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5985         },
5986         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5987           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5988           0, 0
5989         },
5990         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5991           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5992           0, 0
5993         },
5994         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5995           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5996           0, 0
5997         },
5998         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5999           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6000           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6001         },
6002         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6003           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6004           0, 0
6005         },
6006         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6007           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6008           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6009         },
6010         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6011           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6012           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6013           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6014         },
6015         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6016           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6017           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6018           0, 0
6019         },
6020         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6021           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6022           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6023           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6024         },
6025         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6026           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6027           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6028           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6029         },
6030         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6031           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6032           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6033           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6034         },
6035         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6036           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6037           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6038           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6039         },
6040         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6041           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6042           0, 0
6043         },
6044         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6045           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6046           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6047         },
6048         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6049           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6050           0, 0
6051         },
6052         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6053           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6054           0, 0
6055         },
6056         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6057           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6058           0, 0
6059         },
6060         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6061           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6062           0, 0
6063         },
6064         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6065           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6066           0, 0
6067         },
6068         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6069           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6070           0, 0
6071         },
6072         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6073           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6074           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6075         },
6076         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6077           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6078           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6079         },
6080         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6081           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6082           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6083         },
6084         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6085           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6086           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6087         },
6088         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6089           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6090           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6091         },
6092         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6093           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6094           0, 0
6095         },
6096         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6097           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6098           0, 0
6099         },
6100         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6101           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6102           0, 0
6103         },
6104         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6105           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6106           0, 0
6107         },
6108         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6109           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6110           0, 0
6111         },
6112         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6113           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6114           0, 0
6115         },
6116         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6117           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6118           0, 0
6119         },
6120         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6121           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6122           0, 0
6123         },
6124         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6125           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6126           0, 0
6127         },
6128         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6129           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6130           0, 0
6131         },
6132         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6133           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6134           0, 0
6135         },
6136         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6137           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6138           0, 0
6139         },
6140         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6141           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6142           0, 0
6143         },
6144         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6145           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6146           0, 0
6147         },
6148         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6149           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6150           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6151         },
6152         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6153           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6154           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6155         },
6156         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6157           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6158           0, 0
6159         },
6160         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6161           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6162           0, 0
6163         },
6164         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6165           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6166           0, 0
6167         },
6168         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6169           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6170           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6171         },
6172         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6173           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6174           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6175         },
6176         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6177           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6178           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6179         },
6180         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6181           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6182           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6183         },
6184         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6185           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6186           0, 0
6187         },
6188         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6189           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6190           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6191         },
6192         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6193           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6194           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6195         },
6196         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6197           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6198           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6199         },
6200         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6201           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6202           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6203         },
6204         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6205           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6206           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6207         },
6208         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6209           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6210           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6211         },
6212         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6213           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6214           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6215         },
6216         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6217           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6218           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6219         },
6220         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6221           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6222           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6223         },
6224         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6225           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6226           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6227         },
6228         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6229           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6230           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6231         },
6232         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6233           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6234           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6235         },
6236         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6237           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6238           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6239         },
6240         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6241           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6242           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6243         },
6244         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6245           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6246           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6247         },
6248         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6249           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6250           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6251         },
6252         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6253           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6254           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6255         },
6256         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6257           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6258           0, 0
6259         },
6260         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6261           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6262           0, 0
6263         },
6264         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6265           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6266           0, 0
6267         },
6268         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6269           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6270           0, 0
6271         },
6272         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6273           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6274           0, 0
6275         },
6276         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6277           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6278           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6279         },
6280         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6281           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6282           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6283         },
6284         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6285           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6286           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6287         },
6288         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6289           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6290           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6291         },
6292         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6293           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6294           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6295         },
6296         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6297           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6298           0, 0
6299         },
6300         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6301           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6302           0, 0
6303         },
6304         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6305           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6306           0, 0
6307         },
6308         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6309           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6310           0, 0
6311         },
6312         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6313           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6314           0, 0
6315         },
6316         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6317           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6318           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6319         },
6320         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6321           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6322           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6323         },
6324         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6325           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6326           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6327         },
6328         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6329           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6330           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6331         },
6332         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6333           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6334           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6335         },
6336         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6337           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6338           0, 0
6339         },
6340         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6341           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6342           0, 0
6343         },
6344         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6345           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6346           0, 0
6347         },
6348         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6349           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6350           0, 0
6351         },
6352         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6353           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6354           0, 0
6355         },
6356         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6357           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6358           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6359         },
6360         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6361           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6362           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6363         },
6364         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6365           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6366           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6367         },
6368         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6369           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6370           0, 0
6371         },
6372         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6373           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6374           0, 0
6375         },
6376         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6377           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6378           0, 0
6379         },
6380         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6381           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6382           0, 0
6383         },
6384         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6385           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6386           0, 0
6387         },
6388         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6389           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6390           0, 0
6391         }
6392 };
6393
6394 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6395                                      void *inject_if)
6396 {
6397         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6398         int ret;
6399         struct ta_ras_trigger_error_input block_info = { 0 };
6400
6401         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6402                 return -EINVAL;
6403
6404         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6405                 return -EINVAL;
6406
6407         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6408                 return -EPERM;
6409
6410         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6411               info->head.type)) {
6412                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6413                         ras_gfx_subblocks[info->head.sub_block_index].name,
6414                         info->head.type);
6415                 return -EPERM;
6416         }
6417
6418         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6419               info->head.type)) {
6420                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6421                         ras_gfx_subblocks[info->head.sub_block_index].name,
6422                         info->head.type);
6423                 return -EPERM;
6424         }
6425
6426         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6427         block_info.sub_block_index =
6428                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6429         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6430         block_info.address = info->address;
6431         block_info.value = info->value;
6432
6433         mutex_lock(&adev->grbm_idx_mutex);
6434         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6435         mutex_unlock(&adev->grbm_idx_mutex);
6436
6437         return ret;
6438 }
6439
6440 static const char *vml2_mems[] = {
6441         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6442         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6443         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6444         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6445         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6446         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6447         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6448         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6449         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6450         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6451         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6452         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6453         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6454         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6455         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6456         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6457 };
6458
6459 static const char *vml2_walker_mems[] = {
6460         "UTC_VML2_CACHE_PDE0_MEM0",
6461         "UTC_VML2_CACHE_PDE0_MEM1",
6462         "UTC_VML2_CACHE_PDE1_MEM0",
6463         "UTC_VML2_CACHE_PDE1_MEM1",
6464         "UTC_VML2_CACHE_PDE2_MEM0",
6465         "UTC_VML2_CACHE_PDE2_MEM1",
6466         "UTC_VML2_RDIF_LOG_FIFO",
6467 };
6468
6469 static const char *atc_l2_cache_2m_mems[] = {
6470         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6471         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6472         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6473         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6474 };
6475
6476 static const char *atc_l2_cache_4k_mems[] = {
6477         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6478         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6479         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6480         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6481         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6482         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6483         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6484         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6485         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6486         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6487         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6488         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6489         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6490         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6491         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6492         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6493         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6494         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6495         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6496         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6497         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6498         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6499         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6500         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6501         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6502         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6503         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6504         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6505         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6506         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6507         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6508         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6509 };
6510
6511 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6512                                          struct ras_err_data *err_data)
6513 {
6514         uint32_t i, data;
6515         uint32_t sec_count, ded_count;
6516
6517         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6518         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6519         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6520         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6521         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6522         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6523         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6524         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6525
6526         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6527                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6528                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6529
6530                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6531                 if (sec_count) {
6532                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6533                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6534                         err_data->ce_count += sec_count;
6535                 }
6536
6537                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6538                 if (ded_count) {
6539                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6540                                 "DED %d\n", i, vml2_mems[i], ded_count);
6541                         err_data->ue_count += ded_count;
6542                 }
6543         }
6544
6545         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6546                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6547                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6548
6549                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6550                                                 SEC_COUNT);
6551                 if (sec_count) {
6552                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6553                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6554                         err_data->ce_count += sec_count;
6555                 }
6556
6557                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6558                                                 DED_COUNT);
6559                 if (ded_count) {
6560                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6561                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6562                         err_data->ue_count += ded_count;
6563                 }
6564         }
6565
6566         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6567                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6568                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6569
6570                 sec_count = (data & 0x00006000L) >> 0xd;
6571                 if (sec_count) {
6572                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6573                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6574                                 sec_count);
6575                         err_data->ce_count += sec_count;
6576                 }
6577         }
6578
6579         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6580                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6581                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6582
6583                 sec_count = (data & 0x00006000L) >> 0xd;
6584                 if (sec_count) {
6585                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6586                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6587                                 sec_count);
6588                         err_data->ce_count += sec_count;
6589                 }
6590
6591                 ded_count = (data & 0x00018000L) >> 0xf;
6592                 if (ded_count) {
6593                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6594                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6595                                 ded_count);
6596                         err_data->ue_count += ded_count;
6597                 }
6598         }
6599
6600         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6601         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6602         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6603         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6604
6605         return 0;
6606 }
6607
6608 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6609         const struct soc15_reg_entry *reg,
6610         uint32_t se_id, uint32_t inst_id, uint32_t value,
6611         uint32_t *sec_count, uint32_t *ded_count)
6612 {
6613         uint32_t i;
6614         uint32_t sec_cnt, ded_cnt;
6615
6616         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6617                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6618                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6619                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6620                         continue;
6621
6622                 sec_cnt = (value &
6623                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6624                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6625                 if (sec_cnt) {
6626                         dev_info(adev->dev, "GFX SubBlock %s, "
6627                                 "Instance[%d][%d], SEC %d\n",
6628                                 gfx_v9_0_ras_fields[i].name,
6629                                 se_id, inst_id,
6630                                 sec_cnt);
6631                         *sec_count += sec_cnt;
6632                 }
6633
6634                 ded_cnt = (value &
6635                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6636                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6637                 if (ded_cnt) {
6638                         dev_info(adev->dev, "GFX SubBlock %s, "
6639                                 "Instance[%d][%d], DED %d\n",
6640                                 gfx_v9_0_ras_fields[i].name,
6641                                 se_id, inst_id,
6642                                 ded_cnt);
6643                         *ded_count += ded_cnt;
6644                 }
6645         }
6646
6647         return 0;
6648 }
6649
6650 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6651 {
6652         int i, j, k;
6653
6654         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6655                 return;
6656
6657         /* read back registers to clear the counters */
6658         mutex_lock(&adev->grbm_idx_mutex);
6659         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6660                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6661                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6662                                 amdgpu_gfx_select_se_sh(adev, j, 0x0, k);
6663                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6664                         }
6665                 }
6666         }
6667         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6668         mutex_unlock(&adev->grbm_idx_mutex);
6669
6670         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6671         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6672         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6673         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6674         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6675         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6676         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6677         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6678
6679         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6680                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6681                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6682         }
6683
6684         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6685                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6686                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6687         }
6688
6689         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6690                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6691                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6692         }
6693
6694         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6695                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6696                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6697         }
6698
6699         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6700         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6701         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6702         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6703 }
6704
6705 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6706                                           void *ras_error_status)
6707 {
6708         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6709         uint32_t sec_count = 0, ded_count = 0;
6710         uint32_t i, j, k;
6711         uint32_t reg_value;
6712
6713         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6714                 return;
6715
6716         err_data->ue_count = 0;
6717         err_data->ce_count = 0;
6718
6719         mutex_lock(&adev->grbm_idx_mutex);
6720
6721         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6722                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6723                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6724                                 amdgpu_gfx_select_se_sh(adev, j, 0, k);
6725                                 reg_value =
6726                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6727                                 if (reg_value)
6728                                         gfx_v9_0_ras_error_count(adev,
6729                                                 &gfx_v9_0_edc_counter_regs[i],
6730                                                 j, k, reg_value,
6731                                                 &sec_count, &ded_count);
6732                         }
6733                 }
6734         }
6735
6736         err_data->ce_count += sec_count;
6737         err_data->ue_count += ded_count;
6738
6739         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6740         mutex_unlock(&adev->grbm_idx_mutex);
6741
6742         gfx_v9_0_query_utc_edc_status(adev, err_data);
6743 }
6744
6745 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6746 {
6747         const unsigned int cp_coher_cntl =
6748                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6749                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6750                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6751                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6752                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6753
6754         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6755         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6756         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6757         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6758         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6759         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6760         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6761         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6762 }
6763
6764 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6765                                         uint32_t pipe, bool enable)
6766 {
6767         struct amdgpu_device *adev = ring->adev;
6768         uint32_t val;
6769         uint32_t wcl_cs_reg;
6770
6771         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6772         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6773
6774         switch (pipe) {
6775         case 0:
6776                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6777                 break;
6778         case 1:
6779                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6780                 break;
6781         case 2:
6782                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6783                 break;
6784         case 3:
6785                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6786                 break;
6787         default:
6788                 DRM_DEBUG("invalid pipe %d\n", pipe);
6789                 return;
6790         }
6791
6792         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6793
6794 }
6795 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6796 {
6797         struct amdgpu_device *adev = ring->adev;
6798         uint32_t val;
6799         int i;
6800
6801
6802         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6803          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6804          * around 25% of gpu resources.
6805          */
6806         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6807         amdgpu_ring_emit_wreg(ring,
6808                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6809                               val);
6810
6811         /* Restrict waves for normal/low priority compute queues as well
6812          * to get best QoS for high priority compute jobs.
6813          *
6814          * amdgpu controls only 1st ME(0-3 CS pipes).
6815          */
6816         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6817                 if (i != ring->pipe)
6818                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6819
6820         }
6821 }
6822
6823 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6824         .name = "gfx_v9_0",
6825         .early_init = gfx_v9_0_early_init,
6826         .late_init = gfx_v9_0_late_init,
6827         .sw_init = gfx_v9_0_sw_init,
6828         .sw_fini = gfx_v9_0_sw_fini,
6829         .hw_init = gfx_v9_0_hw_init,
6830         .hw_fini = gfx_v9_0_hw_fini,
6831         .suspend = gfx_v9_0_suspend,
6832         .resume = gfx_v9_0_resume,
6833         .is_idle = gfx_v9_0_is_idle,
6834         .wait_for_idle = gfx_v9_0_wait_for_idle,
6835         .soft_reset = gfx_v9_0_soft_reset,
6836         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6837         .set_powergating_state = gfx_v9_0_set_powergating_state,
6838         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6839 };
6840
6841 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6842         .type = AMDGPU_RING_TYPE_GFX,
6843         .align_mask = 0xff,
6844         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6845         .support_64bit_ptrs = true,
6846         .secure_submission_supported = true,
6847         .vmhub = AMDGPU_GFXHUB_0,
6848         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6849         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6850         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6851         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6852                 5 +  /* COND_EXEC */
6853                 7 +  /* PIPELINE_SYNC */
6854                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6855                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6856                 2 + /* VM_FLUSH */
6857                 8 +  /* FENCE for VM_FLUSH */
6858                 20 + /* GDS switch */
6859                 4 + /* double SWITCH_BUFFER,
6860                        the first COND_EXEC jump to the place just
6861                            prior to this double SWITCH_BUFFER  */
6862                 5 + /* COND_EXEC */
6863                 7 +      /*     HDP_flush */
6864                 4 +      /*     VGT_flush */
6865                 14 + /* CE_META */
6866                 31 + /* DE_META */
6867                 3 + /* CNTX_CTRL */
6868                 5 + /* HDP_INVL */
6869                 8 + 8 + /* FENCE x2 */
6870                 2 + /* SWITCH_BUFFER */
6871                 7, /* gfx_v9_0_emit_mem_sync */
6872         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6873         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6874         .emit_fence = gfx_v9_0_ring_emit_fence,
6875         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6876         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6877         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6878         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6879         .test_ring = gfx_v9_0_ring_test_ring,
6880         .insert_nop = amdgpu_ring_insert_nop,
6881         .pad_ib = amdgpu_ring_generic_pad_ib,
6882         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6883         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6884         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6885         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6886         .preempt_ib = gfx_v9_0_ring_preempt_ib,
6887         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6888         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6889         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6890         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6891         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6892         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6893 };
6894
6895 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6896         .type = AMDGPU_RING_TYPE_GFX,
6897         .align_mask = 0xff,
6898         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6899         .support_64bit_ptrs = true,
6900         .secure_submission_supported = true,
6901         .vmhub = AMDGPU_GFXHUB_0,
6902         .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6903         .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6904         .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6905         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6906                 5 +  /* COND_EXEC */
6907                 7 +  /* PIPELINE_SYNC */
6908                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6909                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6910                 2 + /* VM_FLUSH */
6911                 8 +  /* FENCE for VM_FLUSH */
6912                 20 + /* GDS switch */
6913                 4 + /* double SWITCH_BUFFER,
6914                      * the first COND_EXEC jump to the place just
6915                      * prior to this double SWITCH_BUFFER
6916                      */
6917                 5 + /* COND_EXEC */
6918                 7 +      /*     HDP_flush */
6919                 4 +      /*     VGT_flush */
6920                 14 + /* CE_META */
6921                 31 + /* DE_META */
6922                 3 + /* CNTX_CTRL */
6923                 5 + /* HDP_INVL */
6924                 8 + 8 + /* FENCE x2 */
6925                 2 + /* SWITCH_BUFFER */
6926                 7, /* gfx_v9_0_emit_mem_sync */
6927         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6928         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6929         .emit_fence = gfx_v9_0_ring_emit_fence,
6930         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6931         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6932         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6933         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6934         .test_ring = gfx_v9_0_ring_test_ring,
6935         .test_ib = gfx_v9_0_ring_test_ib,
6936         .insert_nop = amdgpu_sw_ring_insert_nop,
6937         .pad_ib = amdgpu_ring_generic_pad_ib,
6938         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6939         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6940         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6941         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6942         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6943         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6944         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6945         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6946         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6947         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6948 };
6949
6950 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6951         .type = AMDGPU_RING_TYPE_COMPUTE,
6952         .align_mask = 0xff,
6953         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6954         .support_64bit_ptrs = true,
6955         .vmhub = AMDGPU_GFXHUB_0,
6956         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6957         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6958         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6959         .emit_frame_size =
6960                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6961                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6962                 5 + /* hdp invalidate */
6963                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6964                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6965                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6966                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6967                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6968                 7 + /* gfx_v9_0_emit_mem_sync */
6969                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6970                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6971         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6972         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6973         .emit_fence = gfx_v9_0_ring_emit_fence,
6974         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6975         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6976         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6977         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6978         .test_ring = gfx_v9_0_ring_test_ring,
6979         .test_ib = gfx_v9_0_ring_test_ib,
6980         .insert_nop = amdgpu_ring_insert_nop,
6981         .pad_ib = amdgpu_ring_generic_pad_ib,
6982         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6983         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6984         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6985         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6986         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
6987 };
6988
6989 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6990         .type = AMDGPU_RING_TYPE_KIQ,
6991         .align_mask = 0xff,
6992         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6993         .support_64bit_ptrs = true,
6994         .vmhub = AMDGPU_GFXHUB_0,
6995         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6996         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6997         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6998         .emit_frame_size =
6999                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7000                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7001                 5 + /* hdp invalidate */
7002                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7003                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7004                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7005                 2 + /* gfx_v9_0_ring_emit_vm_flush */
7006                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7007         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7008         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7009         .test_ring = gfx_v9_0_ring_test_ring,
7010         .insert_nop = amdgpu_ring_insert_nop,
7011         .pad_ib = amdgpu_ring_generic_pad_ib,
7012         .emit_rreg = gfx_v9_0_ring_emit_rreg,
7013         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7014         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7015         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7016 };
7017
7018 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7019 {
7020         int i;
7021
7022         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7023
7024         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7025                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7026
7027         if (adev->gfx.num_gfx_rings) {
7028                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7029                         adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7030         }
7031
7032         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7033                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7034 }
7035
7036 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7037         .set = gfx_v9_0_set_eop_interrupt_state,
7038         .process = gfx_v9_0_eop_irq,
7039 };
7040
7041 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7042         .set = gfx_v9_0_set_priv_reg_fault_state,
7043         .process = gfx_v9_0_priv_reg_irq,
7044 };
7045
7046 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7047         .set = gfx_v9_0_set_priv_inst_fault_state,
7048         .process = gfx_v9_0_priv_inst_irq,
7049 };
7050
7051 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7052         .set = gfx_v9_0_set_cp_ecc_error_state,
7053         .process = amdgpu_gfx_cp_ecc_error_irq,
7054 };
7055
7056
7057 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7058 {
7059         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7060         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7061
7062         adev->gfx.priv_reg_irq.num_types = 1;
7063         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7064
7065         adev->gfx.priv_inst_irq.num_types = 1;
7066         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7067
7068         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7069         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7070 }
7071
7072 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7073 {
7074         switch (adev->ip_versions[GC_HWIP][0]) {
7075         case IP_VERSION(9, 0, 1):
7076         case IP_VERSION(9, 2, 1):
7077         case IP_VERSION(9, 4, 0):
7078         case IP_VERSION(9, 2, 2):
7079         case IP_VERSION(9, 1, 0):
7080         case IP_VERSION(9, 4, 1):
7081         case IP_VERSION(9, 3, 0):
7082         case IP_VERSION(9, 4, 2):
7083                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7084                 break;
7085         default:
7086                 break;
7087         }
7088 }
7089
7090 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7091 {
7092         /* init asci gds info */
7093         switch (adev->ip_versions[GC_HWIP][0]) {
7094         case IP_VERSION(9, 0, 1):
7095         case IP_VERSION(9, 2, 1):
7096         case IP_VERSION(9, 4, 0):
7097                 adev->gds.gds_size = 0x10000;
7098                 break;
7099         case IP_VERSION(9, 2, 2):
7100         case IP_VERSION(9, 1, 0):
7101         case IP_VERSION(9, 4, 1):
7102                 adev->gds.gds_size = 0x1000;
7103                 break;
7104         case IP_VERSION(9, 4, 2):
7105                 /* aldebaran removed all the GDS internal memory,
7106                  * only support GWS opcode in kernel, like barrier
7107                  * semaphore.etc */
7108                 adev->gds.gds_size = 0;
7109                 break;
7110         default:
7111                 adev->gds.gds_size = 0x10000;
7112                 break;
7113         }
7114
7115         switch (adev->ip_versions[GC_HWIP][0]) {
7116         case IP_VERSION(9, 0, 1):
7117         case IP_VERSION(9, 4, 0):
7118                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7119                 break;
7120         case IP_VERSION(9, 2, 1):
7121                 adev->gds.gds_compute_max_wave_id = 0x27f;
7122                 break;
7123         case IP_VERSION(9, 2, 2):
7124         case IP_VERSION(9, 1, 0):
7125                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7126                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7127                 else
7128                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7129                 break;
7130         case IP_VERSION(9, 4, 1):
7131                 adev->gds.gds_compute_max_wave_id = 0xfff;
7132                 break;
7133         case IP_VERSION(9, 4, 2):
7134                 /* deprecated for Aldebaran, no usage at all */
7135                 adev->gds.gds_compute_max_wave_id = 0;
7136                 break;
7137         default:
7138                 /* this really depends on the chip */
7139                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7140                 break;
7141         }
7142
7143         adev->gds.gws_size = 64;
7144         adev->gds.oa_size = 16;
7145 }
7146
7147 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7148                                                  u32 bitmap)
7149 {
7150         u32 data;
7151
7152         if (!bitmap)
7153                 return;
7154
7155         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7156         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7157
7158         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7159 }
7160
7161 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7162 {
7163         u32 data, mask;
7164
7165         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7166         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7167
7168         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7169         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7170
7171         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7172
7173         return (~data) & mask;
7174 }
7175
7176 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7177                                  struct amdgpu_cu_info *cu_info)
7178 {
7179         int i, j, k, counter, active_cu_number = 0;
7180         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7181         unsigned disable_masks[4 * 4];
7182
7183         if (!adev || !cu_info)
7184                 return -EINVAL;
7185
7186         /*
7187          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7188          */
7189         if (adev->gfx.config.max_shader_engines *
7190                 adev->gfx.config.max_sh_per_se > 16)
7191                 return -EINVAL;
7192
7193         amdgpu_gfx_parse_disable_cu(disable_masks,
7194                                     adev->gfx.config.max_shader_engines,
7195                                     adev->gfx.config.max_sh_per_se);
7196
7197         mutex_lock(&adev->grbm_idx_mutex);
7198         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7199                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7200                         mask = 1;
7201                         ao_bitmap = 0;
7202                         counter = 0;
7203                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
7204                         gfx_v9_0_set_user_cu_inactive_bitmap(
7205                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7206                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7207
7208                         /*
7209                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7210                          * 4x4 size array, and it's usually suitable for Vega
7211                          * ASICs which has 4*2 SE/SH layout.
7212                          * But for Arcturus, SE/SH layout is changed to 8*1.
7213                          * To mostly reduce the impact, we make it compatible
7214                          * with current bitmap array as below:
7215                          *    SE4,SH0 --> bitmap[0][1]
7216                          *    SE5,SH0 --> bitmap[1][1]
7217                          *    SE6,SH0 --> bitmap[2][1]
7218                          *    SE7,SH0 --> bitmap[3][1]
7219                          */
7220                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7221
7222                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7223                                 if (bitmap & mask) {
7224                                         if (counter < adev->gfx.config.max_cu_per_sh)
7225                                                 ao_bitmap |= mask;
7226                                         counter ++;
7227                                 }
7228                                 mask <<= 1;
7229                         }
7230                         active_cu_number += counter;
7231                         if (i < 2 && j < 2)
7232                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7233                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7234                 }
7235         }
7236         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7237         mutex_unlock(&adev->grbm_idx_mutex);
7238
7239         cu_info->number = active_cu_number;
7240         cu_info->ao_cu_mask = ao_cu_mask;
7241         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7242
7243         return 0;
7244 }
7245
7246 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7247 {
7248         .type = AMD_IP_BLOCK_TYPE_GFX,
7249         .major = 9,
7250         .minor = 0,
7251         .rev = 0,
7252         .funcs = &gfx_v9_0_ip_funcs,
7253 };