Merge tag 'sound-fix-5.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
123 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
133
134 enum ta_ras_gfx_subblock {
135         /*CPC*/
136         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138         TA_RAS_BLOCK__GFX_CPC_UCODE,
139         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146         /* CPF*/
147         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150         TA_RAS_BLOCK__GFX_CPF_TAG,
151         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152         /* CPG*/
153         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156         TA_RAS_BLOCK__GFX_CPG_TAG,
157         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158         /* GDS*/
159         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166         /* SPI*/
167         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168         /* SQ*/
169         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171         TA_RAS_BLOCK__GFX_SQ_LDS_D,
172         TA_RAS_BLOCK__GFX_SQ_LDS_I,
173         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175         /* SQC (3 ranges)*/
176         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177         /* SQC range 0*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         /* SQC range 1*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         /* SQC range 2*/
204         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218         /* TA*/
219         TA_RAS_BLOCK__GFX_TA_INDEX_START,
220         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226         /* TCA*/
227         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231         /* TCC (5 sub-ranges)*/
232         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233         /* TCC range 0*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244         /* TCC range 1*/
245         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         /* TCC range 2*/
251         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         /* TCC range 3*/
263         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         /* TCC range 4*/
269         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276         /* TCI*/
277         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278         /* TCP*/
279         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288         /* TD*/
289         TA_RAS_BLOCK__GFX_TD_INDEX_START,
290         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294         /* EA (3 sub-ranges)*/
295         TA_RAS_BLOCK__GFX_EA_INDEX_START,
296         /* EA range 0*/
297         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307         /* EA range 1*/
308         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317         /* EA range 2*/
318         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325         /* UTC VM L2 bank*/
326         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327         /* UTC VM walker*/
328         TA_RAS_BLOCK__UTC_VML2_WALKER,
329         /* UTC ATC L2 2MB cache*/
330         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331         /* UTC ATC L2 4KB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333         TA_RAS_BLOCK__GFX_MAX
334 };
335
336 struct ras_gfx_subblock {
337         unsigned char *name;
338         int ta_subblock;
339         int hw_supported_error_type;
340         int sw_supported_error_type;
341 };
342
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345                 #subblock,                                                     \
346                 TA_RAS_BLOCK__##subblock,                                      \
347                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349         }
350
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369                              0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380                              0, 0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382                              0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384                              0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386                              0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388                              0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390                              0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392                              1),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394                              0, 0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406                              0, 0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412                              0, 0, 0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424                              0, 0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426                              0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436                              1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442                              0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457                              0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460                              0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462                              0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464                              0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
523 };
524
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
588 };
589
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
682 };
683
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
696 };
697
698 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
699 {
700         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708 };
709
710 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
711 {
712         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720 };
721
722 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
723 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
724 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
725 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
726
727 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
729 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
731 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
732                                  struct amdgpu_cu_info *cu_info);
733 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
734 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
735 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
736 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
737 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
738                                           void *ras_error_status);
739 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
740                                      void *inject_if);
741
742 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
743                                 uint64_t queue_mask)
744 {
745         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
746         amdgpu_ring_write(kiq_ring,
747                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
748                 /* vmid_mask:0* queue_type:0 (KIQ) */
749                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
750         amdgpu_ring_write(kiq_ring,
751                         lower_32_bits(queue_mask));     /* queue mask lo */
752         amdgpu_ring_write(kiq_ring,
753                         upper_32_bits(queue_mask));     /* queue mask hi */
754         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
755         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
756         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
757         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
758 }
759
760 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
761                                  struct amdgpu_ring *ring)
762 {
763         struct amdgpu_device *adev = kiq_ring->adev;
764         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
765         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
766         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
767
768         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
769         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
770         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
771                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
772                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
773                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
774                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
775                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
776                          /*queue_type: normal compute queue */
777                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
778                          /* alloc format: all_on_one_pipe */
779                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
780                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
781                          /* num_queues: must be 1 */
782                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
783         amdgpu_ring_write(kiq_ring,
784                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
785         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
786         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
787         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
788         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
789 }
790
791 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
792                                    struct amdgpu_ring *ring,
793                                    enum amdgpu_unmap_queues_action action,
794                                    u64 gpu_addr, u64 seq)
795 {
796         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
797
798         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
799         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
800                           PACKET3_UNMAP_QUEUES_ACTION(action) |
801                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
802                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
803                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
804         amdgpu_ring_write(kiq_ring,
805                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
806
807         if (action == PREEMPT_QUEUES_NO_UNMAP) {
808                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
809                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
810                 amdgpu_ring_write(kiq_ring, seq);
811         } else {
812                 amdgpu_ring_write(kiq_ring, 0);
813                 amdgpu_ring_write(kiq_ring, 0);
814                 amdgpu_ring_write(kiq_ring, 0);
815         }
816 }
817
818 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
819                                    struct amdgpu_ring *ring,
820                                    u64 addr,
821                                    u64 seq)
822 {
823         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
824
825         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
826         amdgpu_ring_write(kiq_ring,
827                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
828                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
829                           PACKET3_QUERY_STATUS_COMMAND(2));
830         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
831         amdgpu_ring_write(kiq_ring,
832                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
833                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
834         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
835         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
836         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
837         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
838 }
839
840 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
841                                 uint16_t pasid, uint32_t flush_type,
842                                 bool all_hub)
843 {
844         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
845         amdgpu_ring_write(kiq_ring,
846                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
847                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
848                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
849                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
850 }
851
852 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
853         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
854         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
855         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
856         .kiq_query_status = gfx_v9_0_kiq_query_status,
857         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
858         .set_resources_size = 8,
859         .map_queues_size = 7,
860         .unmap_queues_size = 6,
861         .query_status_size = 7,
862         .invalidate_tlbs_size = 12,
863 };
864
865 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
866 {
867         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
868 }
869
870 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
871 {
872         switch (adev->asic_type) {
873         case CHIP_VEGA10:
874                 soc15_program_register_sequence(adev,
875                                                 golden_settings_gc_9_0,
876                                                 ARRAY_SIZE(golden_settings_gc_9_0));
877                 soc15_program_register_sequence(adev,
878                                                 golden_settings_gc_9_0_vg10,
879                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
880                 break;
881         case CHIP_VEGA12:
882                 soc15_program_register_sequence(adev,
883                                                 golden_settings_gc_9_2_1,
884                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
885                 soc15_program_register_sequence(adev,
886                                                 golden_settings_gc_9_2_1_vg12,
887                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
888                 break;
889         case CHIP_VEGA20:
890                 soc15_program_register_sequence(adev,
891                                                 golden_settings_gc_9_0,
892                                                 ARRAY_SIZE(golden_settings_gc_9_0));
893                 soc15_program_register_sequence(adev,
894                                                 golden_settings_gc_9_0_vg20,
895                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
896                 break;
897         case CHIP_ARCTURUS:
898                 soc15_program_register_sequence(adev,
899                                                 golden_settings_gc_9_4_1_arct,
900                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
901                 break;
902         case CHIP_RAVEN:
903                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
904                                                 ARRAY_SIZE(golden_settings_gc_9_1));
905                 if (adev->rev_id >= 8)
906                         soc15_program_register_sequence(adev,
907                                                         golden_settings_gc_9_1_rv2,
908                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
909                 else
910                         soc15_program_register_sequence(adev,
911                                                         golden_settings_gc_9_1_rv1,
912                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
913                 break;
914          case CHIP_RENOIR:
915                 soc15_program_register_sequence(adev,
916                                                 golden_settings_gc_9_1_rn,
917                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
918                 return; /* for renoir, don't need common goldensetting */
919         default:
920                 break;
921         }
922
923         if (adev->asic_type != CHIP_ARCTURUS)
924                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
925                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
926 }
927
928 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
929 {
930         adev->gfx.scratch.num_reg = 8;
931         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
932         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
933 }
934
935 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
936                                        bool wc, uint32_t reg, uint32_t val)
937 {
938         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
939         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
940                                 WRITE_DATA_DST_SEL(0) |
941                                 (wc ? WR_CONFIRM : 0));
942         amdgpu_ring_write(ring, reg);
943         amdgpu_ring_write(ring, 0);
944         amdgpu_ring_write(ring, val);
945 }
946
947 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
948                                   int mem_space, int opt, uint32_t addr0,
949                                   uint32_t addr1, uint32_t ref, uint32_t mask,
950                                   uint32_t inv)
951 {
952         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
953         amdgpu_ring_write(ring,
954                                  /* memory (1) or register (0) */
955                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
956                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
957                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
958                                  WAIT_REG_MEM_ENGINE(eng_sel)));
959
960         if (mem_space)
961                 BUG_ON(addr0 & 0x3); /* Dword align */
962         amdgpu_ring_write(ring, addr0);
963         amdgpu_ring_write(ring, addr1);
964         amdgpu_ring_write(ring, ref);
965         amdgpu_ring_write(ring, mask);
966         amdgpu_ring_write(ring, inv); /* poll interval */
967 }
968
969 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
970 {
971         struct amdgpu_device *adev = ring->adev;
972         uint32_t scratch;
973         uint32_t tmp = 0;
974         unsigned i;
975         int r;
976
977         r = amdgpu_gfx_scratch_get(adev, &scratch);
978         if (r)
979                 return r;
980
981         WREG32(scratch, 0xCAFEDEAD);
982         r = amdgpu_ring_alloc(ring, 3);
983         if (r)
984                 goto error_free_scratch;
985
986         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
987         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
988         amdgpu_ring_write(ring, 0xDEADBEEF);
989         amdgpu_ring_commit(ring);
990
991         for (i = 0; i < adev->usec_timeout; i++) {
992                 tmp = RREG32(scratch);
993                 if (tmp == 0xDEADBEEF)
994                         break;
995                 udelay(1);
996         }
997
998         if (i >= adev->usec_timeout)
999                 r = -ETIMEDOUT;
1000
1001 error_free_scratch:
1002         amdgpu_gfx_scratch_free(adev, scratch);
1003         return r;
1004 }
1005
1006 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1007 {
1008         struct amdgpu_device *adev = ring->adev;
1009         struct amdgpu_ib ib;
1010         struct dma_fence *f = NULL;
1011
1012         unsigned index;
1013         uint64_t gpu_addr;
1014         uint32_t tmp;
1015         long r;
1016
1017         r = amdgpu_device_wb_get(adev, &index);
1018         if (r)
1019                 return r;
1020
1021         gpu_addr = adev->wb.gpu_addr + (index * 4);
1022         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1023         memset(&ib, 0, sizeof(ib));
1024         r = amdgpu_ib_get(adev, NULL, 16, &ib);
1025         if (r)
1026                 goto err1;
1027
1028         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1029         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1030         ib.ptr[2] = lower_32_bits(gpu_addr);
1031         ib.ptr[3] = upper_32_bits(gpu_addr);
1032         ib.ptr[4] = 0xDEADBEEF;
1033         ib.length_dw = 5;
1034
1035         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1036         if (r)
1037                 goto err2;
1038
1039         r = dma_fence_wait_timeout(f, false, timeout);
1040         if (r == 0) {
1041                 r = -ETIMEDOUT;
1042                 goto err2;
1043         } else if (r < 0) {
1044                 goto err2;
1045         }
1046
1047         tmp = adev->wb.wb[index];
1048         if (tmp == 0xDEADBEEF)
1049                 r = 0;
1050         else
1051                 r = -EINVAL;
1052
1053 err2:
1054         amdgpu_ib_free(adev, &ib, NULL);
1055         dma_fence_put(f);
1056 err1:
1057         amdgpu_device_wb_free(adev, index);
1058         return r;
1059 }
1060
1061
1062 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1063 {
1064         release_firmware(adev->gfx.pfp_fw);
1065         adev->gfx.pfp_fw = NULL;
1066         release_firmware(adev->gfx.me_fw);
1067         adev->gfx.me_fw = NULL;
1068         release_firmware(adev->gfx.ce_fw);
1069         adev->gfx.ce_fw = NULL;
1070         release_firmware(adev->gfx.rlc_fw);
1071         adev->gfx.rlc_fw = NULL;
1072         release_firmware(adev->gfx.mec_fw);
1073         adev->gfx.mec_fw = NULL;
1074         release_firmware(adev->gfx.mec2_fw);
1075         adev->gfx.mec2_fw = NULL;
1076
1077         kfree(adev->gfx.rlc.register_list_format);
1078 }
1079
1080 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1081 {
1082         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1083
1084         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1085         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1086         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1087         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1088         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1089         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1090         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1091         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1092         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1093         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1094         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1095         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1096         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1097         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1098                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1099 }
1100
1101 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1102 {
1103         adev->gfx.me_fw_write_wait = false;
1104         adev->gfx.mec_fw_write_wait = false;
1105
1106         if ((adev->gfx.mec_fw_version < 0x000001a5) ||
1107             (adev->gfx.mec_feature_version < 46) ||
1108             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1109             (adev->gfx.pfp_feature_version < 46))
1110                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1111
1112         switch (adev->asic_type) {
1113         case CHIP_VEGA10:
1114                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1115                     (adev->gfx.me_feature_version >= 42) &&
1116                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1117                     (adev->gfx.pfp_feature_version >= 42))
1118                         adev->gfx.me_fw_write_wait = true;
1119
1120                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1121                     (adev->gfx.mec_feature_version >= 42))
1122                         adev->gfx.mec_fw_write_wait = true;
1123                 break;
1124         case CHIP_VEGA12:
1125                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1126                     (adev->gfx.me_feature_version >= 44) &&
1127                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1128                     (adev->gfx.pfp_feature_version >= 44))
1129                         adev->gfx.me_fw_write_wait = true;
1130
1131                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1132                     (adev->gfx.mec_feature_version >= 44))
1133                         adev->gfx.mec_fw_write_wait = true;
1134                 break;
1135         case CHIP_VEGA20:
1136                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1137                     (adev->gfx.me_feature_version >= 44) &&
1138                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1139                     (adev->gfx.pfp_feature_version >= 44))
1140                         adev->gfx.me_fw_write_wait = true;
1141
1142                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1143                     (adev->gfx.mec_feature_version >= 44))
1144                         adev->gfx.mec_fw_write_wait = true;
1145                 break;
1146         case CHIP_RAVEN:
1147                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1148                     (adev->gfx.me_feature_version >= 42) &&
1149                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1150                     (adev->gfx.pfp_feature_version >= 42))
1151                         adev->gfx.me_fw_write_wait = true;
1152
1153                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1154                     (adev->gfx.mec_feature_version >= 42))
1155                         adev->gfx.mec_fw_write_wait = true;
1156                 break;
1157         default:
1158                 break;
1159         }
1160 }
1161
1162 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1163 {
1164         switch (adev->asic_type) {
1165         case CHIP_VEGA10:
1166         case CHIP_VEGA12:
1167         case CHIP_VEGA20:
1168                 break;
1169         case CHIP_RAVEN:
1170                 if (!(adev->rev_id >= 0x8 ||
1171                       adev->pdev->device == 0x15d8) &&
1172                     (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
1173                      !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
1174                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1175
1176                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1177                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1178                                 AMD_PG_SUPPORT_CP |
1179                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1180                 break;
1181         case CHIP_RENOIR:
1182                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1183                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1184                                 AMD_PG_SUPPORT_CP |
1185                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1186                 break;
1187         default:
1188                 break;
1189         }
1190 }
1191
1192 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1193                                           const char *chip_name)
1194 {
1195         char fw_name[30];
1196         int err;
1197         struct amdgpu_firmware_info *info = NULL;
1198         const struct common_firmware_header *header = NULL;
1199         const struct gfx_firmware_header_v1_0 *cp_hdr;
1200
1201         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1202         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1203         if (err)
1204                 goto out;
1205         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1206         if (err)
1207                 goto out;
1208         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1209         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1210         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1211
1212         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1213         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1214         if (err)
1215                 goto out;
1216         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1217         if (err)
1218                 goto out;
1219         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1220         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1221         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1222
1223         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1224         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1225         if (err)
1226                 goto out;
1227         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1228         if (err)
1229                 goto out;
1230         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1231         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1232         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1233
1234         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1235                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1236                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1237                 info->fw = adev->gfx.pfp_fw;
1238                 header = (const struct common_firmware_header *)info->fw->data;
1239                 adev->firmware.fw_size +=
1240                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1241
1242                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1243                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1244                 info->fw = adev->gfx.me_fw;
1245                 header = (const struct common_firmware_header *)info->fw->data;
1246                 adev->firmware.fw_size +=
1247                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1248
1249                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1250                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1251                 info->fw = adev->gfx.ce_fw;
1252                 header = (const struct common_firmware_header *)info->fw->data;
1253                 adev->firmware.fw_size +=
1254                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1255         }
1256
1257 out:
1258         if (err) {
1259                 dev_err(adev->dev,
1260                         "gfx9: Failed to load firmware \"%s\"\n",
1261                         fw_name);
1262                 release_firmware(adev->gfx.pfp_fw);
1263                 adev->gfx.pfp_fw = NULL;
1264                 release_firmware(adev->gfx.me_fw);
1265                 adev->gfx.me_fw = NULL;
1266                 release_firmware(adev->gfx.ce_fw);
1267                 adev->gfx.ce_fw = NULL;
1268         }
1269         return err;
1270 }
1271
1272 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1273                                           const char *chip_name)
1274 {
1275         char fw_name[30];
1276         int err;
1277         struct amdgpu_firmware_info *info = NULL;
1278         const struct common_firmware_header *header = NULL;
1279         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1280         unsigned int *tmp = NULL;
1281         unsigned int i = 0;
1282         uint16_t version_major;
1283         uint16_t version_minor;
1284         uint32_t smu_version;
1285
1286         /*
1287          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1288          * instead of picasso_rlc.bin.
1289          * Judgment method:
1290          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1291          *          or revision >= 0xD8 && revision <= 0xDF
1292          * otherwise is PCO FP5
1293          */
1294         if (!strcmp(chip_name, "picasso") &&
1295                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1296                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1297                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1298         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1299                 (smu_version >= 0x41e2b))
1300                 /**
1301                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1302                 */
1303                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1304         else
1305                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1306         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1307         if (err)
1308                 goto out;
1309         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1310         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1311
1312         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1313         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1314         if (version_major == 2 && version_minor == 1)
1315                 adev->gfx.rlc.is_rlc_v2_1 = true;
1316
1317         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1318         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1319         adev->gfx.rlc.save_and_restore_offset =
1320                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1321         adev->gfx.rlc.clear_state_descriptor_offset =
1322                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1323         adev->gfx.rlc.avail_scratch_ram_locations =
1324                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1325         adev->gfx.rlc.reg_restore_list_size =
1326                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1327         adev->gfx.rlc.reg_list_format_start =
1328                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1329         adev->gfx.rlc.reg_list_format_separate_start =
1330                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1331         adev->gfx.rlc.starting_offsets_start =
1332                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1333         adev->gfx.rlc.reg_list_format_size_bytes =
1334                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1335         adev->gfx.rlc.reg_list_size_bytes =
1336                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1337         adev->gfx.rlc.register_list_format =
1338                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1339                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1340         if (!adev->gfx.rlc.register_list_format) {
1341                 err = -ENOMEM;
1342                 goto out;
1343         }
1344
1345         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1346                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1347         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1348                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1349
1350         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1351
1352         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1353                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1354         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1355                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1356
1357         if (adev->gfx.rlc.is_rlc_v2_1)
1358                 gfx_v9_0_init_rlc_ext_microcode(adev);
1359
1360         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1361                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1362                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1363                 info->fw = adev->gfx.rlc_fw;
1364                 header = (const struct common_firmware_header *)info->fw->data;
1365                 adev->firmware.fw_size +=
1366                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1367
1368                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1369                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1370                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1371                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1372                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1373                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1374                         info->fw = adev->gfx.rlc_fw;
1375                         adev->firmware.fw_size +=
1376                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1377
1378                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1379                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1380                         info->fw = adev->gfx.rlc_fw;
1381                         adev->firmware.fw_size +=
1382                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1383
1384                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1385                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1386                         info->fw = adev->gfx.rlc_fw;
1387                         adev->firmware.fw_size +=
1388                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1389                 }
1390         }
1391
1392 out:
1393         if (err) {
1394                 dev_err(adev->dev,
1395                         "gfx9: Failed to load firmware \"%s\"\n",
1396                         fw_name);
1397                 release_firmware(adev->gfx.rlc_fw);
1398                 adev->gfx.rlc_fw = NULL;
1399         }
1400         return err;
1401 }
1402
1403 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1404                                           const char *chip_name)
1405 {
1406         char fw_name[30];
1407         int err;
1408         struct amdgpu_firmware_info *info = NULL;
1409         const struct common_firmware_header *header = NULL;
1410         const struct gfx_firmware_header_v1_0 *cp_hdr;
1411
1412         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1413         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1414         if (err)
1415                 goto out;
1416         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1417         if (err)
1418                 goto out;
1419         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1420         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1421         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1422
1423
1424         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1425         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1426         if (!err) {
1427                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1428                 if (err)
1429                         goto out;
1430                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1431                 adev->gfx.mec2_fw->data;
1432                 adev->gfx.mec2_fw_version =
1433                 le32_to_cpu(cp_hdr->header.ucode_version);
1434                 adev->gfx.mec2_feature_version =
1435                 le32_to_cpu(cp_hdr->ucode_feature_version);
1436         } else {
1437                 err = 0;
1438                 adev->gfx.mec2_fw = NULL;
1439         }
1440
1441         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1442                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1443                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1444                 info->fw = adev->gfx.mec_fw;
1445                 header = (const struct common_firmware_header *)info->fw->data;
1446                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1447                 adev->firmware.fw_size +=
1448                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1449
1450                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1451                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1452                 info->fw = adev->gfx.mec_fw;
1453                 adev->firmware.fw_size +=
1454                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1455
1456                 if (adev->gfx.mec2_fw) {
1457                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1458                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1459                         info->fw = adev->gfx.mec2_fw;
1460                         header = (const struct common_firmware_header *)info->fw->data;
1461                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1462                         adev->firmware.fw_size +=
1463                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1464
1465                         /* TODO: Determine if MEC2 JT FW loading can be removed
1466                                  for all GFX V9 asic and above */
1467                         if (adev->asic_type != CHIP_ARCTURUS &&
1468                             adev->asic_type != CHIP_RENOIR) {
1469                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1470                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1471                                 info->fw = adev->gfx.mec2_fw;
1472                                 adev->firmware.fw_size +=
1473                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1474                                         PAGE_SIZE);
1475                         }
1476                 }
1477         }
1478
1479 out:
1480         gfx_v9_0_check_if_need_gfxoff(adev);
1481         gfx_v9_0_check_fw_write_wait(adev);
1482         if (err) {
1483                 dev_err(adev->dev,
1484                         "gfx9: Failed to load firmware \"%s\"\n",
1485                         fw_name);
1486                 release_firmware(adev->gfx.mec_fw);
1487                 adev->gfx.mec_fw = NULL;
1488                 release_firmware(adev->gfx.mec2_fw);
1489                 adev->gfx.mec2_fw = NULL;
1490         }
1491         return err;
1492 }
1493
1494 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1495 {
1496         const char *chip_name;
1497         int r;
1498
1499         DRM_DEBUG("\n");
1500
1501         switch (adev->asic_type) {
1502         case CHIP_VEGA10:
1503                 chip_name = "vega10";
1504                 break;
1505         case CHIP_VEGA12:
1506                 chip_name = "vega12";
1507                 break;
1508         case CHIP_VEGA20:
1509                 chip_name = "vega20";
1510                 break;
1511         case CHIP_RAVEN:
1512                 if (adev->rev_id >= 8)
1513                         chip_name = "raven2";
1514                 else if (adev->pdev->device == 0x15d8)
1515                         chip_name = "picasso";
1516                 else
1517                         chip_name = "raven";
1518                 break;
1519         case CHIP_ARCTURUS:
1520                 chip_name = "arcturus";
1521                 break;
1522         case CHIP_RENOIR:
1523                 chip_name = "renoir";
1524                 break;
1525         default:
1526                 BUG();
1527         }
1528
1529         /* No CPG in Arcturus */
1530         if (adev->asic_type != CHIP_ARCTURUS) {
1531                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1532                 if (r)
1533                         return r;
1534         }
1535
1536         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1537         if (r)
1538                 return r;
1539
1540         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1541         if (r)
1542                 return r;
1543
1544         return r;
1545 }
1546
1547 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1548 {
1549         u32 count = 0;
1550         const struct cs_section_def *sect = NULL;
1551         const struct cs_extent_def *ext = NULL;
1552
1553         /* begin clear state */
1554         count += 2;
1555         /* context control state */
1556         count += 3;
1557
1558         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1559                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1560                         if (sect->id == SECT_CONTEXT)
1561                                 count += 2 + ext->reg_count;
1562                         else
1563                                 return 0;
1564                 }
1565         }
1566
1567         /* end clear state */
1568         count += 2;
1569         /* clear state */
1570         count += 2;
1571
1572         return count;
1573 }
1574
1575 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1576                                     volatile u32 *buffer)
1577 {
1578         u32 count = 0, i;
1579         const struct cs_section_def *sect = NULL;
1580         const struct cs_extent_def *ext = NULL;
1581
1582         if (adev->gfx.rlc.cs_data == NULL)
1583                 return;
1584         if (buffer == NULL)
1585                 return;
1586
1587         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1588         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1589
1590         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1591         buffer[count++] = cpu_to_le32(0x80000000);
1592         buffer[count++] = cpu_to_le32(0x80000000);
1593
1594         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1595                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1596                         if (sect->id == SECT_CONTEXT) {
1597                                 buffer[count++] =
1598                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1599                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1600                                                 PACKET3_SET_CONTEXT_REG_START);
1601                                 for (i = 0; i < ext->reg_count; i++)
1602                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1603                         } else {
1604                                 return;
1605                         }
1606                 }
1607         }
1608
1609         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1610         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1611
1612         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1613         buffer[count++] = cpu_to_le32(0);
1614 }
1615
1616 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1617 {
1618         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1619         uint32_t pg_always_on_cu_num = 2;
1620         uint32_t always_on_cu_num;
1621         uint32_t i, j, k;
1622         uint32_t mask, cu_bitmap, counter;
1623
1624         if (adev->flags & AMD_IS_APU)
1625                 always_on_cu_num = 4;
1626         else if (adev->asic_type == CHIP_VEGA12)
1627                 always_on_cu_num = 8;
1628         else
1629                 always_on_cu_num = 12;
1630
1631         mutex_lock(&adev->grbm_idx_mutex);
1632         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1633                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1634                         mask = 1;
1635                         cu_bitmap = 0;
1636                         counter = 0;
1637                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1638
1639                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1640                                 if (cu_info->bitmap[i][j] & mask) {
1641                                         if (counter == pg_always_on_cu_num)
1642                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1643                                         if (counter < always_on_cu_num)
1644                                                 cu_bitmap |= mask;
1645                                         else
1646                                                 break;
1647                                         counter++;
1648                                 }
1649                                 mask <<= 1;
1650                         }
1651
1652                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1653                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1654                 }
1655         }
1656         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1657         mutex_unlock(&adev->grbm_idx_mutex);
1658 }
1659
1660 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1661 {
1662         uint32_t data;
1663
1664         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1665         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1666         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1667         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1668         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1669
1670         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1671         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1672
1673         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1674         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1675
1676         mutex_lock(&adev->grbm_idx_mutex);
1677         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1678         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1679         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1680
1681         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1682         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1683         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1684         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1685         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1686
1687         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1688         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1689         data &= 0x0000FFFF;
1690         data |= 0x00C00000;
1691         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1692
1693         /*
1694          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1695          * programmed in gfx_v9_0_init_always_on_cu_mask()
1696          */
1697
1698         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1699          * but used for RLC_LB_CNTL configuration */
1700         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1701         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1702         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1703         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1704         mutex_unlock(&adev->grbm_idx_mutex);
1705
1706         gfx_v9_0_init_always_on_cu_mask(adev);
1707 }
1708
1709 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1710 {
1711         uint32_t data;
1712
1713         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1714         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1715         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1716         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1717         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1718
1719         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1720         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1721
1722         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1723         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1724
1725         mutex_lock(&adev->grbm_idx_mutex);
1726         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1727         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1728         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1729
1730         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1731         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1732         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1733         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1734         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1735
1736         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1737         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1738         data &= 0x0000FFFF;
1739         data |= 0x00C00000;
1740         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1741
1742         /*
1743          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1744          * programmed in gfx_v9_0_init_always_on_cu_mask()
1745          */
1746
1747         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1748          * but used for RLC_LB_CNTL configuration */
1749         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1750         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1751         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1752         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1753         mutex_unlock(&adev->grbm_idx_mutex);
1754
1755         gfx_v9_0_init_always_on_cu_mask(adev);
1756 }
1757
1758 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1759 {
1760         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1761 }
1762
1763 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1764 {
1765         return 5;
1766 }
1767
1768 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1769 {
1770         const struct cs_section_def *cs_data;
1771         int r;
1772
1773         adev->gfx.rlc.cs_data = gfx9_cs_data;
1774
1775         cs_data = adev->gfx.rlc.cs_data;
1776
1777         if (cs_data) {
1778                 /* init clear state block */
1779                 r = amdgpu_gfx_rlc_init_csb(adev);
1780                 if (r)
1781                         return r;
1782         }
1783
1784         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1785                 /* TODO: double check the cp_table_size for RV */
1786                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1787                 r = amdgpu_gfx_rlc_init_cpt(adev);
1788                 if (r)
1789                         return r;
1790         }
1791
1792         switch (adev->asic_type) {
1793         case CHIP_RAVEN:
1794                 gfx_v9_0_init_lbpw(adev);
1795                 break;
1796         case CHIP_VEGA20:
1797                 gfx_v9_4_init_lbpw(adev);
1798                 break;
1799         default:
1800                 break;
1801         }
1802
1803         return 0;
1804 }
1805
1806 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1807 {
1808         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1809         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1810 }
1811
1812 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1813 {
1814         int r;
1815         u32 *hpd;
1816         const __le32 *fw_data;
1817         unsigned fw_size;
1818         u32 *fw;
1819         size_t mec_hpd_size;
1820
1821         const struct gfx_firmware_header_v1_0 *mec_hdr;
1822
1823         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1824
1825         /* take ownership of the relevant compute queues */
1826         amdgpu_gfx_compute_queue_acquire(adev);
1827         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1828
1829         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1830                                       AMDGPU_GEM_DOMAIN_VRAM,
1831                                       &adev->gfx.mec.hpd_eop_obj,
1832                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1833                                       (void **)&hpd);
1834         if (r) {
1835                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1836                 gfx_v9_0_mec_fini(adev);
1837                 return r;
1838         }
1839
1840         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1841
1842         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1843         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1844
1845         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1846
1847         fw_data = (const __le32 *)
1848                 (adev->gfx.mec_fw->data +
1849                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1850         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1851
1852         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1853                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1854                                       &adev->gfx.mec.mec_fw_obj,
1855                                       &adev->gfx.mec.mec_fw_gpu_addr,
1856                                       (void **)&fw);
1857         if (r) {
1858                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1859                 gfx_v9_0_mec_fini(adev);
1860                 return r;
1861         }
1862
1863         memcpy(fw, fw_data, fw_size);
1864
1865         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1866         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1867
1868         return 0;
1869 }
1870
1871 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1872 {
1873         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1874                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1875                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1876                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1877                 (SQ_IND_INDEX__FORCE_READ_MASK));
1878         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1879 }
1880
1881 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1882                            uint32_t wave, uint32_t thread,
1883                            uint32_t regno, uint32_t num, uint32_t *out)
1884 {
1885         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1886                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1887                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1888                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1889                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1890                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1891                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1892         while (num--)
1893                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1894 }
1895
1896 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1897 {
1898         /* type 1 wave data */
1899         dst[(*no_fields)++] = 1;
1900         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1901         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1902         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1903         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1904         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1905         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1906         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1907         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1908         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1909         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1910         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1911         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1912         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1913         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1914 }
1915
1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1917                                      uint32_t wave, uint32_t start,
1918                                      uint32_t size, uint32_t *dst)
1919 {
1920         wave_read_regs(
1921                 adev, simd, wave, 0,
1922                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1923 }
1924
1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1926                                      uint32_t wave, uint32_t thread,
1927                                      uint32_t start, uint32_t size,
1928                                      uint32_t *dst)
1929 {
1930         wave_read_regs(
1931                 adev, simd, wave, thread,
1932                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1933 }
1934
1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1936                                   u32 me, u32 pipe, u32 q, u32 vm)
1937 {
1938         soc15_grbm_select(adev, me, pipe, q, vm);
1939 }
1940
1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1942         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1943         .select_se_sh = &gfx_v9_0_select_se_sh,
1944         .read_wave_data = &gfx_v9_0_read_wave_data,
1945         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1946         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1947         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1948         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1949         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1950 };
1951
1952 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1953 {
1954         u32 gb_addr_config;
1955         int err;
1956
1957         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1958
1959         switch (adev->asic_type) {
1960         case CHIP_VEGA10:
1961                 adev->gfx.config.max_hw_contexts = 8;
1962                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1963                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1964                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1965                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1966                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1967                 break;
1968         case CHIP_VEGA12:
1969                 adev->gfx.config.max_hw_contexts = 8;
1970                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1971                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1972                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1973                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1974                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1975                 DRM_INFO("fix gfx.config for vega12\n");
1976                 break;
1977         case CHIP_VEGA20:
1978                 adev->gfx.config.max_hw_contexts = 8;
1979                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1980                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1981                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1982                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1983                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1984                 gb_addr_config &= ~0xf3e777ff;
1985                 gb_addr_config |= 0x22014042;
1986                 /* check vbios table if gpu info is not available */
1987                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1988                 if (err)
1989                         return err;
1990                 break;
1991         case CHIP_RAVEN:
1992                 adev->gfx.config.max_hw_contexts = 8;
1993                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1994                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1995                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1996                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1997                 if (adev->rev_id >= 8)
1998                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1999                 else
2000                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2001                 break;
2002         case CHIP_ARCTURUS:
2003                 adev->gfx.config.max_hw_contexts = 8;
2004                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2005                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2006                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2007                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2008                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2009                 gb_addr_config &= ~0xf3e777ff;
2010                 gb_addr_config |= 0x22014042;
2011                 break;
2012         case CHIP_RENOIR:
2013                 adev->gfx.config.max_hw_contexts = 8;
2014                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2015                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2016                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2017                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2018                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2019                 gb_addr_config &= ~0xf3e777ff;
2020                 gb_addr_config |= 0x22010042;
2021                 break;
2022         default:
2023                 BUG();
2024                 break;
2025         }
2026
2027         adev->gfx.config.gb_addr_config = gb_addr_config;
2028
2029         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2030                         REG_GET_FIELD(
2031                                         adev->gfx.config.gb_addr_config,
2032                                         GB_ADDR_CONFIG,
2033                                         NUM_PIPES);
2034
2035         adev->gfx.config.max_tile_pipes =
2036                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2037
2038         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2039                         REG_GET_FIELD(
2040                                         adev->gfx.config.gb_addr_config,
2041                                         GB_ADDR_CONFIG,
2042                                         NUM_BANKS);
2043         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2044                         REG_GET_FIELD(
2045                                         adev->gfx.config.gb_addr_config,
2046                                         GB_ADDR_CONFIG,
2047                                         MAX_COMPRESSED_FRAGS);
2048         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2049                         REG_GET_FIELD(
2050                                         adev->gfx.config.gb_addr_config,
2051                                         GB_ADDR_CONFIG,
2052                                         NUM_RB_PER_SE);
2053         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2054                         REG_GET_FIELD(
2055                                         adev->gfx.config.gb_addr_config,
2056                                         GB_ADDR_CONFIG,
2057                                         NUM_SHADER_ENGINES);
2058         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2059                         REG_GET_FIELD(
2060                                         adev->gfx.config.gb_addr_config,
2061                                         GB_ADDR_CONFIG,
2062                                         PIPE_INTERLEAVE_SIZE));
2063
2064         return 0;
2065 }
2066
2067 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2068                                       int mec, int pipe, int queue)
2069 {
2070         int r;
2071         unsigned irq_type;
2072         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2073
2074         ring = &adev->gfx.compute_ring[ring_id];
2075
2076         /* mec0 is me1 */
2077         ring->me = mec + 1;
2078         ring->pipe = pipe;
2079         ring->queue = queue;
2080
2081         ring->ring_obj = NULL;
2082         ring->use_doorbell = true;
2083         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2084         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2085                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2086         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2087
2088         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2089                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2090                 + ring->pipe;
2091
2092         /* type-2 packets are deprecated on MEC, use type-3 instead */
2093         r = amdgpu_ring_init(adev, ring, 1024,
2094                              &adev->gfx.eop_irq, irq_type);
2095         if (r)
2096                 return r;
2097
2098
2099         return 0;
2100 }
2101
2102 static int gfx_v9_0_sw_init(void *handle)
2103 {
2104         int i, j, k, r, ring_id;
2105         struct amdgpu_ring *ring;
2106         struct amdgpu_kiq *kiq;
2107         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2108
2109         switch (adev->asic_type) {
2110         case CHIP_VEGA10:
2111         case CHIP_VEGA12:
2112         case CHIP_VEGA20:
2113         case CHIP_RAVEN:
2114         case CHIP_ARCTURUS:
2115         case CHIP_RENOIR:
2116                 adev->gfx.mec.num_mec = 2;
2117                 break;
2118         default:
2119                 adev->gfx.mec.num_mec = 1;
2120                 break;
2121         }
2122
2123         adev->gfx.mec.num_pipe_per_mec = 4;
2124         adev->gfx.mec.num_queue_per_pipe = 8;
2125
2126         /* EOP Event */
2127         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2128         if (r)
2129                 return r;
2130
2131         /* Privileged reg */
2132         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2133                               &adev->gfx.priv_reg_irq);
2134         if (r)
2135                 return r;
2136
2137         /* Privileged inst */
2138         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2139                               &adev->gfx.priv_inst_irq);
2140         if (r)
2141                 return r;
2142
2143         /* ECC error */
2144         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2145                               &adev->gfx.cp_ecc_error_irq);
2146         if (r)
2147                 return r;
2148
2149         /* FUE error */
2150         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2151                               &adev->gfx.cp_ecc_error_irq);
2152         if (r)
2153                 return r;
2154
2155         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2156
2157         gfx_v9_0_scratch_init(adev);
2158
2159         r = gfx_v9_0_init_microcode(adev);
2160         if (r) {
2161                 DRM_ERROR("Failed to load gfx firmware!\n");
2162                 return r;
2163         }
2164
2165         r = adev->gfx.rlc.funcs->init(adev);
2166         if (r) {
2167                 DRM_ERROR("Failed to init rlc BOs!\n");
2168                 return r;
2169         }
2170
2171         r = gfx_v9_0_mec_init(adev);
2172         if (r) {
2173                 DRM_ERROR("Failed to init MEC BOs!\n");
2174                 return r;
2175         }
2176
2177         /* set up the gfx ring */
2178         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2179                 ring = &adev->gfx.gfx_ring[i];
2180                 ring->ring_obj = NULL;
2181                 if (!i)
2182                         sprintf(ring->name, "gfx");
2183                 else
2184                         sprintf(ring->name, "gfx_%d", i);
2185                 ring->use_doorbell = true;
2186                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2187                 r = amdgpu_ring_init(adev, ring, 1024,
2188                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2189                 if (r)
2190                         return r;
2191         }
2192
2193         /* set up the compute queues - allocate horizontally across pipes */
2194         ring_id = 0;
2195         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2196                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2197                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2198                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2199                                         continue;
2200
2201                                 r = gfx_v9_0_compute_ring_init(adev,
2202                                                                ring_id,
2203                                                                i, k, j);
2204                                 if (r)
2205                                         return r;
2206
2207                                 ring_id++;
2208                         }
2209                 }
2210         }
2211
2212         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2213         if (r) {
2214                 DRM_ERROR("Failed to init KIQ BOs!\n");
2215                 return r;
2216         }
2217
2218         kiq = &adev->gfx.kiq;
2219         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2220         if (r)
2221                 return r;
2222
2223         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2224         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2225         if (r)
2226                 return r;
2227
2228         adev->gfx.ce_ram_size = 0x8000;
2229
2230         r = gfx_v9_0_gpu_early_init(adev);
2231         if (r)
2232                 return r;
2233
2234         return 0;
2235 }
2236
2237
2238 static int gfx_v9_0_sw_fini(void *handle)
2239 {
2240         int i;
2241         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2242
2243         amdgpu_gfx_ras_fini(adev);
2244
2245         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2246                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2247         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2248                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2249
2250         amdgpu_gfx_mqd_sw_fini(adev);
2251         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2252         amdgpu_gfx_kiq_fini(adev);
2253
2254         gfx_v9_0_mec_fini(adev);
2255         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2256         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2257                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2258                                 &adev->gfx.rlc.cp_table_gpu_addr,
2259                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2260         }
2261         gfx_v9_0_free_microcode(adev);
2262
2263         return 0;
2264 }
2265
2266
2267 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2268 {
2269         /* TODO */
2270 }
2271
2272 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2273 {
2274         u32 data;
2275
2276         if (instance == 0xffffffff)
2277                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2278         else
2279                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2280
2281         if (se_num == 0xffffffff)
2282                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2283         else
2284                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2285
2286         if (sh_num == 0xffffffff)
2287                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2288         else
2289                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2290
2291         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2292 }
2293
2294 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2295 {
2296         u32 data, mask;
2297
2298         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2299         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2300
2301         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2302         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2303
2304         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2305                                          adev->gfx.config.max_sh_per_se);
2306
2307         return (~data) & mask;
2308 }
2309
2310 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2311 {
2312         int i, j;
2313         u32 data;
2314         u32 active_rbs = 0;
2315         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2316                                         adev->gfx.config.max_sh_per_se;
2317
2318         mutex_lock(&adev->grbm_idx_mutex);
2319         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2320                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2321                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2322                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2323                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2324                                                rb_bitmap_width_per_sh);
2325                 }
2326         }
2327         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2328         mutex_unlock(&adev->grbm_idx_mutex);
2329
2330         adev->gfx.config.backend_enable_mask = active_rbs;
2331         adev->gfx.config.num_rbs = hweight32(active_rbs);
2332 }
2333
2334 #define DEFAULT_SH_MEM_BASES    (0x6000)
2335 #define FIRST_COMPUTE_VMID      (8)
2336 #define LAST_COMPUTE_VMID       (16)
2337 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2338 {
2339         int i;
2340         uint32_t sh_mem_config;
2341         uint32_t sh_mem_bases;
2342
2343         /*
2344          * Configure apertures:
2345          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2346          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2347          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2348          */
2349         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2350
2351         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2352                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2353                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2354
2355         mutex_lock(&adev->srbm_mutex);
2356         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2357                 soc15_grbm_select(adev, 0, 0, 0, i);
2358                 /* CP and shaders */
2359                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2360                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2361         }
2362         soc15_grbm_select(adev, 0, 0, 0, 0);
2363         mutex_unlock(&adev->srbm_mutex);
2364
2365         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2366            acccess. These should be enabled by FW for target VMIDs. */
2367         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2368                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2369                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2370                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2371                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2372         }
2373 }
2374
2375 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2376 {
2377         int vmid;
2378
2379         /*
2380          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2381          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2382          * the driver can enable them for graphics. VMID0 should maintain
2383          * access so that HWS firmware can save/restore entries.
2384          */
2385         for (vmid = 1; vmid < 16; vmid++) {
2386                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2387                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2388                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2389                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2390         }
2391 }
2392
2393 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2394 {
2395         u32 tmp;
2396         int i;
2397
2398         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2399
2400         gfx_v9_0_tiling_mode_table_init(adev);
2401
2402         gfx_v9_0_setup_rb(adev);
2403         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2404         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2405
2406         /* XXX SH_MEM regs */
2407         /* where to put LDS, scratch, GPUVM in FSA64 space */
2408         mutex_lock(&adev->srbm_mutex);
2409         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2410                 soc15_grbm_select(adev, 0, 0, 0, i);
2411                 /* CP and shaders */
2412                 if (i == 0) {
2413                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2414                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2415                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2416                                             !!amdgpu_noretry);
2417                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2418                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2419                 } else {
2420                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2421                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2422                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2423                                             !!amdgpu_noretry);
2424                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2425                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2426                                 (adev->gmc.private_aperture_start >> 48));
2427                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2428                                 (adev->gmc.shared_aperture_start >> 48));
2429                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2430                 }
2431         }
2432         soc15_grbm_select(adev, 0, 0, 0, 0);
2433
2434         mutex_unlock(&adev->srbm_mutex);
2435
2436         gfx_v9_0_init_compute_vmid(adev);
2437         gfx_v9_0_init_gds_vmid(adev);
2438 }
2439
2440 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2441 {
2442         u32 i, j, k;
2443         u32 mask;
2444
2445         mutex_lock(&adev->grbm_idx_mutex);
2446         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2447                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2448                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2449                         for (k = 0; k < adev->usec_timeout; k++) {
2450                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2451                                         break;
2452                                 udelay(1);
2453                         }
2454                         if (k == adev->usec_timeout) {
2455                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2456                                                       0xffffffff, 0xffffffff);
2457                                 mutex_unlock(&adev->grbm_idx_mutex);
2458                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2459                                          i, j);
2460                                 return;
2461                         }
2462                 }
2463         }
2464         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2465         mutex_unlock(&adev->grbm_idx_mutex);
2466
2467         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2468                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2469                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2470                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2471         for (k = 0; k < adev->usec_timeout; k++) {
2472                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2473                         break;
2474                 udelay(1);
2475         }
2476 }
2477
2478 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2479                                                bool enable)
2480 {
2481         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2482
2483         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2484         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2485         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2486         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2487
2488         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2489 }
2490
2491 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2492 {
2493         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2494         /* csib */
2495         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2496                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2497         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2498                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2499         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2500                         adev->gfx.rlc.clear_state_size);
2501 }
2502
2503 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2504                                 int indirect_offset,
2505                                 int list_size,
2506                                 int *unique_indirect_regs,
2507                                 int unique_indirect_reg_count,
2508                                 int *indirect_start_offsets,
2509                                 int *indirect_start_offsets_count,
2510                                 int max_start_offsets_count)
2511 {
2512         int idx;
2513
2514         for (; indirect_offset < list_size; indirect_offset++) {
2515                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2516                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2517                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2518
2519                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2520                         indirect_offset += 2;
2521
2522                         /* look for the matching indice */
2523                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2524                                 if (unique_indirect_regs[idx] ==
2525                                         register_list_format[indirect_offset] ||
2526                                         !unique_indirect_regs[idx])
2527                                         break;
2528                         }
2529
2530                         BUG_ON(idx >= unique_indirect_reg_count);
2531
2532                         if (!unique_indirect_regs[idx])
2533                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2534
2535                         indirect_offset++;
2536                 }
2537         }
2538 }
2539
2540 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2541 {
2542         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2543         int unique_indirect_reg_count = 0;
2544
2545         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2546         int indirect_start_offsets_count = 0;
2547
2548         int list_size = 0;
2549         int i = 0, j = 0;
2550         u32 tmp = 0;
2551
2552         u32 *register_list_format =
2553                 kmemdup(adev->gfx.rlc.register_list_format,
2554                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2555         if (!register_list_format)
2556                 return -ENOMEM;
2557
2558         /* setup unique_indirect_regs array and indirect_start_offsets array */
2559         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2560         gfx_v9_1_parse_ind_reg_list(register_list_format,
2561                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2562                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2563                                     unique_indirect_regs,
2564                                     unique_indirect_reg_count,
2565                                     indirect_start_offsets,
2566                                     &indirect_start_offsets_count,
2567                                     ARRAY_SIZE(indirect_start_offsets));
2568
2569         /* enable auto inc in case it is disabled */
2570         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2571         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2572         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2573
2574         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2575         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2576                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2577         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2578                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2579                         adev->gfx.rlc.register_restore[i]);
2580
2581         /* load indirect register */
2582         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2583                 adev->gfx.rlc.reg_list_format_start);
2584
2585         /* direct register portion */
2586         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2587                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2588                         register_list_format[i]);
2589
2590         /* indirect register portion */
2591         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2592                 if (register_list_format[i] == 0xFFFFFFFF) {
2593                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2594                         continue;
2595                 }
2596
2597                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2598                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2599
2600                 for (j = 0; j < unique_indirect_reg_count; j++) {
2601                         if (register_list_format[i] == unique_indirect_regs[j]) {
2602                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2603                                 break;
2604                         }
2605                 }
2606
2607                 BUG_ON(j >= unique_indirect_reg_count);
2608
2609                 i++;
2610         }
2611
2612         /* set save/restore list size */
2613         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2614         list_size = list_size >> 1;
2615         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2616                 adev->gfx.rlc.reg_restore_list_size);
2617         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2618
2619         /* write the starting offsets to RLC scratch ram */
2620         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2621                 adev->gfx.rlc.starting_offsets_start);
2622         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2623                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2624                        indirect_start_offsets[i]);
2625
2626         /* load unique indirect regs*/
2627         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2628                 if (unique_indirect_regs[i] != 0) {
2629                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2630                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2631                                unique_indirect_regs[i] & 0x3FFFF);
2632
2633                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2634                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2635                                unique_indirect_regs[i] >> 20);
2636                 }
2637         }
2638
2639         kfree(register_list_format);
2640         return 0;
2641 }
2642
2643 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2644 {
2645         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2646 }
2647
2648 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2649                                              bool enable)
2650 {
2651         uint32_t data = 0;
2652         uint32_t default_data = 0;
2653
2654         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2655         if (enable == true) {
2656                 /* enable GFXIP control over CGPG */
2657                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2658                 if(default_data != data)
2659                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2660
2661                 /* update status */
2662                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2663                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2664                 if(default_data != data)
2665                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2666         } else {
2667                 /* restore GFXIP control over GCPG */
2668                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2669                 if(default_data != data)
2670                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2671         }
2672 }
2673
2674 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2675 {
2676         uint32_t data = 0;
2677
2678         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2679                               AMD_PG_SUPPORT_GFX_SMG |
2680                               AMD_PG_SUPPORT_GFX_DMG)) {
2681                 /* init IDLE_POLL_COUNT = 60 */
2682                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2683                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2684                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2685                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2686
2687                 /* init RLC PG Delay */
2688                 data = 0;
2689                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2690                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2691                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2692                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2693                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2694
2695                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2696                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2697                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2698                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2699
2700                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2701                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2702                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2703                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2704
2705                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2706                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2707
2708                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2709                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2710                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2711
2712                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2713         }
2714 }
2715
2716 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2717                                                 bool enable)
2718 {
2719         uint32_t data = 0;
2720         uint32_t default_data = 0;
2721
2722         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2723         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2724                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2725                              enable ? 1 : 0);
2726         if (default_data != data)
2727                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2728 }
2729
2730 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2731                                                 bool enable)
2732 {
2733         uint32_t data = 0;
2734         uint32_t default_data = 0;
2735
2736         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2737         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2738                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2739                              enable ? 1 : 0);
2740         if(default_data != data)
2741                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2742 }
2743
2744 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2745                                         bool enable)
2746 {
2747         uint32_t data = 0;
2748         uint32_t default_data = 0;
2749
2750         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2751         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2752                              CP_PG_DISABLE,
2753                              enable ? 0 : 1);
2754         if(default_data != data)
2755                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2756 }
2757
2758 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2759                                                 bool enable)
2760 {
2761         uint32_t data, default_data;
2762
2763         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2764         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2765                              GFX_POWER_GATING_ENABLE,
2766                              enable ? 1 : 0);
2767         if(default_data != data)
2768                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2769 }
2770
2771 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2772                                                 bool enable)
2773 {
2774         uint32_t data, default_data;
2775
2776         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2777         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2778                              GFX_PIPELINE_PG_ENABLE,
2779                              enable ? 1 : 0);
2780         if(default_data != data)
2781                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2782
2783         if (!enable)
2784                 /* read any GFX register to wake up GFX */
2785                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2786 }
2787
2788 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2789                                                        bool enable)
2790 {
2791         uint32_t data, default_data;
2792
2793         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2794         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2795                              STATIC_PER_CU_PG_ENABLE,
2796                              enable ? 1 : 0);
2797         if(default_data != data)
2798                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2799 }
2800
2801 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2802                                                 bool enable)
2803 {
2804         uint32_t data, default_data;
2805
2806         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2807         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2808                              DYN_PER_CU_PG_ENABLE,
2809                              enable ? 1 : 0);
2810         if(default_data != data)
2811                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2812 }
2813
2814 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2815 {
2816         gfx_v9_0_init_csb(adev);
2817
2818         /*
2819          * Rlc save restore list is workable since v2_1.
2820          * And it's needed by gfxoff feature.
2821          */
2822         if (adev->gfx.rlc.is_rlc_v2_1) {
2823                 if (adev->asic_type == CHIP_VEGA12 ||
2824                     (adev->asic_type == CHIP_RAVEN &&
2825                      adev->rev_id >= 8))
2826                         gfx_v9_1_init_rlc_save_restore_list(adev);
2827                 gfx_v9_0_enable_save_restore_machine(adev);
2828         }
2829
2830         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2831                               AMD_PG_SUPPORT_GFX_SMG |
2832                               AMD_PG_SUPPORT_GFX_DMG |
2833                               AMD_PG_SUPPORT_CP |
2834                               AMD_PG_SUPPORT_GDS |
2835                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2836                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2837                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2838                 gfx_v9_0_init_gfx_power_gating(adev);
2839         }
2840 }
2841
2842 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2843 {
2844         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2845         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2846         gfx_v9_0_wait_for_rlc_serdes(adev);
2847 }
2848
2849 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2850 {
2851         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2852         udelay(50);
2853         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2854         udelay(50);
2855 }
2856
2857 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2858 {
2859 #ifdef AMDGPU_RLC_DEBUG_RETRY
2860         u32 rlc_ucode_ver;
2861 #endif
2862
2863         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2864         udelay(50);
2865
2866         /* carrizo do enable cp interrupt after cp inited */
2867         if (!(adev->flags & AMD_IS_APU)) {
2868                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2869                 udelay(50);
2870         }
2871
2872 #ifdef AMDGPU_RLC_DEBUG_RETRY
2873         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2874         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2875         if(rlc_ucode_ver == 0x108) {
2876                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2877                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2878                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2879                  * default is 0x9C4 to create a 100us interval */
2880                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2881                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2882                  * to disable the page fault retry interrupts, default is
2883                  * 0x100 (256) */
2884                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2885         }
2886 #endif
2887 }
2888
2889 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2890 {
2891         const struct rlc_firmware_header_v2_0 *hdr;
2892         const __le32 *fw_data;
2893         unsigned i, fw_size;
2894
2895         if (!adev->gfx.rlc_fw)
2896                 return -EINVAL;
2897
2898         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2899         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2900
2901         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2902                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2903         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2904
2905         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2906                         RLCG_UCODE_LOADING_START_ADDRESS);
2907         for (i = 0; i < fw_size; i++)
2908                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2909         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2910
2911         return 0;
2912 }
2913
2914 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2915 {
2916         int r;
2917
2918         if (amdgpu_sriov_vf(adev)) {
2919                 gfx_v9_0_init_csb(adev);
2920                 return 0;
2921         }
2922
2923         adev->gfx.rlc.funcs->stop(adev);
2924
2925         /* disable CG */
2926         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2927
2928         gfx_v9_0_init_pg(adev);
2929
2930         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2931                 /* legacy rlc firmware loading */
2932                 r = gfx_v9_0_rlc_load_microcode(adev);
2933                 if (r)
2934                         return r;
2935         }
2936
2937         switch (adev->asic_type) {
2938         case CHIP_RAVEN:
2939                 if (amdgpu_lbpw == 0)
2940                         gfx_v9_0_enable_lbpw(adev, false);
2941                 else
2942                         gfx_v9_0_enable_lbpw(adev, true);
2943                 break;
2944         case CHIP_VEGA20:
2945                 if (amdgpu_lbpw > 0)
2946                         gfx_v9_0_enable_lbpw(adev, true);
2947                 else
2948                         gfx_v9_0_enable_lbpw(adev, false);
2949                 break;
2950         default:
2951                 break;
2952         }
2953
2954         adev->gfx.rlc.funcs->start(adev);
2955
2956         return 0;
2957 }
2958
2959 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2960 {
2961         int i;
2962         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2963
2964         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2965         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2966         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2967         if (!enable) {
2968                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2969                         adev->gfx.gfx_ring[i].sched.ready = false;
2970         }
2971         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2972         udelay(50);
2973 }
2974
2975 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2976 {
2977         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2978         const struct gfx_firmware_header_v1_0 *ce_hdr;
2979         const struct gfx_firmware_header_v1_0 *me_hdr;
2980         const __le32 *fw_data;
2981         unsigned i, fw_size;
2982
2983         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2984                 return -EINVAL;
2985
2986         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2987                 adev->gfx.pfp_fw->data;
2988         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2989                 adev->gfx.ce_fw->data;
2990         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2991                 adev->gfx.me_fw->data;
2992
2993         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2994         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2995         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2996
2997         gfx_v9_0_cp_gfx_enable(adev, false);
2998
2999         /* PFP */
3000         fw_data = (const __le32 *)
3001                 (adev->gfx.pfp_fw->data +
3002                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3003         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3004         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3005         for (i = 0; i < fw_size; i++)
3006                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3007         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3008
3009         /* CE */
3010         fw_data = (const __le32 *)
3011                 (adev->gfx.ce_fw->data +
3012                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3013         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3014         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3015         for (i = 0; i < fw_size; i++)
3016                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3017         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3018
3019         /* ME */
3020         fw_data = (const __le32 *)
3021                 (adev->gfx.me_fw->data +
3022                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3023         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3024         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3025         for (i = 0; i < fw_size; i++)
3026                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3027         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3028
3029         return 0;
3030 }
3031
3032 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3033 {
3034         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3035         const struct cs_section_def *sect = NULL;
3036         const struct cs_extent_def *ext = NULL;
3037         int r, i, tmp;
3038
3039         /* init the CP */
3040         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3041         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3042
3043         gfx_v9_0_cp_gfx_enable(adev, true);
3044
3045         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3046         if (r) {
3047                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3048                 return r;
3049         }
3050
3051         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3052         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3053
3054         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3055         amdgpu_ring_write(ring, 0x80000000);
3056         amdgpu_ring_write(ring, 0x80000000);
3057
3058         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3059                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3060                         if (sect->id == SECT_CONTEXT) {
3061                                 amdgpu_ring_write(ring,
3062                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3063                                                ext->reg_count));
3064                                 amdgpu_ring_write(ring,
3065                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3066                                 for (i = 0; i < ext->reg_count; i++)
3067                                         amdgpu_ring_write(ring, ext->extent[i]);
3068                         }
3069                 }
3070         }
3071
3072         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3074
3075         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3076         amdgpu_ring_write(ring, 0);
3077
3078         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3079         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3080         amdgpu_ring_write(ring, 0x8000);
3081         amdgpu_ring_write(ring, 0x8000);
3082
3083         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3084         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3085                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3086         amdgpu_ring_write(ring, tmp);
3087         amdgpu_ring_write(ring, 0);
3088
3089         amdgpu_ring_commit(ring);
3090
3091         return 0;
3092 }
3093
3094 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3095 {
3096         struct amdgpu_ring *ring;
3097         u32 tmp;
3098         u32 rb_bufsz;
3099         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3100
3101         /* Set the write pointer delay */
3102         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3103
3104         /* set the RB to use vmid 0 */
3105         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3106
3107         /* Set ring buffer size */
3108         ring = &adev->gfx.gfx_ring[0];
3109         rb_bufsz = order_base_2(ring->ring_size / 8);
3110         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3111         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3112 #ifdef __BIG_ENDIAN
3113         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3114 #endif
3115         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3116
3117         /* Initialize the ring buffer's write pointers */
3118         ring->wptr = 0;
3119         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3120         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3121
3122         /* set the wb address wether it's enabled or not */
3123         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3124         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3125         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3126
3127         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3128         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3129         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3130
3131         mdelay(1);
3132         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3133
3134         rb_addr = ring->gpu_addr >> 8;
3135         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3136         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3137
3138         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3139         if (ring->use_doorbell) {
3140                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3141                                     DOORBELL_OFFSET, ring->doorbell_index);
3142                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3143                                     DOORBELL_EN, 1);
3144         } else {
3145                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3146         }
3147         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3148
3149         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3150                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3151         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3152
3153         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3154                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3155
3156
3157         /* start the ring */
3158         gfx_v9_0_cp_gfx_start(adev);
3159         ring->sched.ready = true;
3160
3161         return 0;
3162 }
3163
3164 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3165 {
3166         int i;
3167
3168         if (enable) {
3169                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3170         } else {
3171                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3172                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3173                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3174                         adev->gfx.compute_ring[i].sched.ready = false;
3175                 adev->gfx.kiq.ring.sched.ready = false;
3176         }
3177         udelay(50);
3178 }
3179
3180 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3181 {
3182         const struct gfx_firmware_header_v1_0 *mec_hdr;
3183         const __le32 *fw_data;
3184         unsigned i;
3185         u32 tmp;
3186
3187         if (!adev->gfx.mec_fw)
3188                 return -EINVAL;
3189
3190         gfx_v9_0_cp_compute_enable(adev, false);
3191
3192         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3193         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3194
3195         fw_data = (const __le32 *)
3196                 (adev->gfx.mec_fw->data +
3197                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3198         tmp = 0;
3199         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3200         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3201         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3202
3203         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3204                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3205         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3206                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3207
3208         /* MEC1 */
3209         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3210                          mec_hdr->jt_offset);
3211         for (i = 0; i < mec_hdr->jt_size; i++)
3212                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3213                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3214
3215         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3216                         adev->gfx.mec_fw_version);
3217         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3218
3219         return 0;
3220 }
3221
3222 /* KIQ functions */
3223 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3224 {
3225         uint32_t tmp;
3226         struct amdgpu_device *adev = ring->adev;
3227
3228         /* tell RLC which is KIQ queue */
3229         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3230         tmp &= 0xffffff00;
3231         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3232         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3233         tmp |= 0x80;
3234         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3235 }
3236
3237 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3238 {
3239         struct amdgpu_device *adev = ring->adev;
3240         struct v9_mqd *mqd = ring->mqd_ptr;
3241         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3242         uint32_t tmp;
3243
3244         mqd->header = 0xC0310800;
3245         mqd->compute_pipelinestat_enable = 0x00000001;
3246         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3247         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3248         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3249         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3250         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3251         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3252         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3253         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3254         mqd->compute_misc_reserved = 0x00000003;
3255
3256         mqd->dynamic_cu_mask_addr_lo =
3257                 lower_32_bits(ring->mqd_gpu_addr
3258                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3259         mqd->dynamic_cu_mask_addr_hi =
3260                 upper_32_bits(ring->mqd_gpu_addr
3261                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3262
3263         eop_base_addr = ring->eop_gpu_addr >> 8;
3264         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3265         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3266
3267         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3268         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3269         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3270                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3271
3272         mqd->cp_hqd_eop_control = tmp;
3273
3274         /* enable doorbell? */
3275         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3276
3277         if (ring->use_doorbell) {
3278                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3279                                     DOORBELL_OFFSET, ring->doorbell_index);
3280                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3281                                     DOORBELL_EN, 1);
3282                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3283                                     DOORBELL_SOURCE, 0);
3284                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3285                                     DOORBELL_HIT, 0);
3286         } else {
3287                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3288                                          DOORBELL_EN, 0);
3289         }
3290
3291         mqd->cp_hqd_pq_doorbell_control = tmp;
3292
3293         /* disable the queue if it's active */
3294         ring->wptr = 0;
3295         mqd->cp_hqd_dequeue_request = 0;
3296         mqd->cp_hqd_pq_rptr = 0;
3297         mqd->cp_hqd_pq_wptr_lo = 0;
3298         mqd->cp_hqd_pq_wptr_hi = 0;
3299
3300         /* set the pointer to the MQD */
3301         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3302         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3303
3304         /* set MQD vmid to 0 */
3305         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3306         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3307         mqd->cp_mqd_control = tmp;
3308
3309         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3310         hqd_gpu_addr = ring->gpu_addr >> 8;
3311         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3312         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3313
3314         /* set up the HQD, this is similar to CP_RB0_CNTL */
3315         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3316         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3317                             (order_base_2(ring->ring_size / 4) - 1));
3318         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3319                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3320 #ifdef __BIG_ENDIAN
3321         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3322 #endif
3323         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3324         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3325         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3326         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3327         mqd->cp_hqd_pq_control = tmp;
3328
3329         /* set the wb address whether it's enabled or not */
3330         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3331         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3332         mqd->cp_hqd_pq_rptr_report_addr_hi =
3333                 upper_32_bits(wb_gpu_addr) & 0xffff;
3334
3335         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3336         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3337         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3338         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3339
3340         tmp = 0;
3341         /* enable the doorbell if requested */
3342         if (ring->use_doorbell) {
3343                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3344                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3345                                 DOORBELL_OFFSET, ring->doorbell_index);
3346
3347                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3348                                          DOORBELL_EN, 1);
3349                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3350                                          DOORBELL_SOURCE, 0);
3351                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3352                                          DOORBELL_HIT, 0);
3353         }
3354
3355         mqd->cp_hqd_pq_doorbell_control = tmp;
3356
3357         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3358         ring->wptr = 0;
3359         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3360
3361         /* set the vmid for the queue */
3362         mqd->cp_hqd_vmid = 0;
3363
3364         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3365         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3366         mqd->cp_hqd_persistent_state = tmp;
3367
3368         /* set MIN_IB_AVAIL_SIZE */
3369         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3370         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3371         mqd->cp_hqd_ib_control = tmp;
3372
3373         /* map_queues packet doesn't need activate the queue,
3374          * so only kiq need set this field.
3375          */
3376         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3377                 mqd->cp_hqd_active = 1;
3378
3379         return 0;
3380 }
3381
3382 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3383 {
3384         struct amdgpu_device *adev = ring->adev;
3385         struct v9_mqd *mqd = ring->mqd_ptr;
3386         int j;
3387
3388         /* disable wptr polling */
3389         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3390
3391         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3392                mqd->cp_hqd_eop_base_addr_lo);
3393         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3394                mqd->cp_hqd_eop_base_addr_hi);
3395
3396         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3397         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3398                mqd->cp_hqd_eop_control);
3399
3400         /* enable doorbell? */
3401         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3402                mqd->cp_hqd_pq_doorbell_control);
3403
3404         /* disable the queue if it's active */
3405         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3406                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3407                 for (j = 0; j < adev->usec_timeout; j++) {
3408                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3409                                 break;
3410                         udelay(1);
3411                 }
3412                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3413                        mqd->cp_hqd_dequeue_request);
3414                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3415                        mqd->cp_hqd_pq_rptr);
3416                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3417                        mqd->cp_hqd_pq_wptr_lo);
3418                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3419                        mqd->cp_hqd_pq_wptr_hi);
3420         }
3421
3422         /* set the pointer to the MQD */
3423         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3424                mqd->cp_mqd_base_addr_lo);
3425         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3426                mqd->cp_mqd_base_addr_hi);
3427
3428         /* set MQD vmid to 0 */
3429         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3430                mqd->cp_mqd_control);
3431
3432         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3433         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3434                mqd->cp_hqd_pq_base_lo);
3435         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3436                mqd->cp_hqd_pq_base_hi);
3437
3438         /* set up the HQD, this is similar to CP_RB0_CNTL */
3439         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3440                mqd->cp_hqd_pq_control);
3441
3442         /* set the wb address whether it's enabled or not */
3443         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3444                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3445         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3446                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3447
3448         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3449         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3450                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3451         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3452                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3453
3454         /* enable the doorbell if requested */
3455         if (ring->use_doorbell) {
3456                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3457                                         (adev->doorbell_index.kiq * 2) << 2);
3458                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3459                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3460         }
3461
3462         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3463                mqd->cp_hqd_pq_doorbell_control);
3464
3465         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3466         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3467                mqd->cp_hqd_pq_wptr_lo);
3468         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3469                mqd->cp_hqd_pq_wptr_hi);
3470
3471         /* set the vmid for the queue */
3472         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3473
3474         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3475                mqd->cp_hqd_persistent_state);
3476
3477         /* activate the queue */
3478         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3479                mqd->cp_hqd_active);
3480
3481         if (ring->use_doorbell)
3482                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3483
3484         return 0;
3485 }
3486
3487 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3488 {
3489         struct amdgpu_device *adev = ring->adev;
3490         int j;
3491
3492         /* disable the queue if it's active */
3493         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3494
3495                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3496
3497                 for (j = 0; j < adev->usec_timeout; j++) {
3498                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3499                                 break;
3500                         udelay(1);
3501                 }
3502
3503                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3504                         DRM_DEBUG("KIQ dequeue request failed.\n");
3505
3506                         /* Manual disable if dequeue request times out */
3507                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3508                 }
3509
3510                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3511                       0);
3512         }
3513
3514         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3515         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3516         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3517         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3518         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3519         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3520         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3521         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3522
3523         return 0;
3524 }
3525
3526 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3527 {
3528         struct amdgpu_device *adev = ring->adev;
3529         struct v9_mqd *mqd = ring->mqd_ptr;
3530         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3531
3532         gfx_v9_0_kiq_setting(ring);
3533
3534         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3535                 /* reset MQD to a clean status */
3536                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3537                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3538
3539                 /* reset ring buffer */
3540                 ring->wptr = 0;
3541                 amdgpu_ring_clear_ring(ring);
3542
3543                 mutex_lock(&adev->srbm_mutex);
3544                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3545                 gfx_v9_0_kiq_init_register(ring);
3546                 soc15_grbm_select(adev, 0, 0, 0, 0);
3547                 mutex_unlock(&adev->srbm_mutex);
3548         } else {
3549                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3550                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3551                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3552                 mutex_lock(&adev->srbm_mutex);
3553                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3554                 gfx_v9_0_mqd_init(ring);
3555                 gfx_v9_0_kiq_init_register(ring);
3556                 soc15_grbm_select(adev, 0, 0, 0, 0);
3557                 mutex_unlock(&adev->srbm_mutex);
3558
3559                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3560                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3561         }
3562
3563         return 0;
3564 }
3565
3566 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3567 {
3568         struct amdgpu_device *adev = ring->adev;
3569         struct v9_mqd *mqd = ring->mqd_ptr;
3570         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3571
3572         if (!adev->in_gpu_reset && !adev->in_suspend) {
3573                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3574                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3575                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3576                 mutex_lock(&adev->srbm_mutex);
3577                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3578                 gfx_v9_0_mqd_init(ring);
3579                 soc15_grbm_select(adev, 0, 0, 0, 0);
3580                 mutex_unlock(&adev->srbm_mutex);
3581
3582                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3583                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3584         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3585                 /* reset MQD to a clean status */
3586                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3587                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3588
3589                 /* reset ring buffer */
3590                 ring->wptr = 0;
3591                 amdgpu_ring_clear_ring(ring);
3592         } else {
3593                 amdgpu_ring_clear_ring(ring);
3594         }
3595
3596         return 0;
3597 }
3598
3599 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3600 {
3601         struct amdgpu_ring *ring;
3602         int r;
3603
3604         ring = &adev->gfx.kiq.ring;
3605
3606         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3607         if (unlikely(r != 0))
3608                 return r;
3609
3610         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3611         if (unlikely(r != 0))
3612                 return r;
3613
3614         gfx_v9_0_kiq_init_queue(ring);
3615         amdgpu_bo_kunmap(ring->mqd_obj);
3616         ring->mqd_ptr = NULL;
3617         amdgpu_bo_unreserve(ring->mqd_obj);
3618         ring->sched.ready = true;
3619         return 0;
3620 }
3621
3622 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3623 {
3624         struct amdgpu_ring *ring = NULL;
3625         int r = 0, i;
3626
3627         gfx_v9_0_cp_compute_enable(adev, true);
3628
3629         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3630                 ring = &adev->gfx.compute_ring[i];
3631
3632                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3633                 if (unlikely(r != 0))
3634                         goto done;
3635                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3636                 if (!r) {
3637                         r = gfx_v9_0_kcq_init_queue(ring);
3638                         amdgpu_bo_kunmap(ring->mqd_obj);
3639                         ring->mqd_ptr = NULL;
3640                 }
3641                 amdgpu_bo_unreserve(ring->mqd_obj);
3642                 if (r)
3643                         goto done;
3644         }
3645
3646         r = amdgpu_gfx_enable_kcq(adev);
3647 done:
3648         return r;
3649 }
3650
3651 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3652 {
3653         int r, i;
3654         struct amdgpu_ring *ring;
3655
3656         if (!(adev->flags & AMD_IS_APU))
3657                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3658
3659         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3660                 if (adev->asic_type != CHIP_ARCTURUS) {
3661                         /* legacy firmware loading */
3662                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3663                         if (r)
3664                                 return r;
3665                 }
3666
3667                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3668                 if (r)
3669                         return r;
3670         }
3671
3672         r = gfx_v9_0_kiq_resume(adev);
3673         if (r)
3674                 return r;
3675
3676         if (adev->asic_type != CHIP_ARCTURUS) {
3677                 r = gfx_v9_0_cp_gfx_resume(adev);
3678                 if (r)
3679                         return r;
3680         }
3681
3682         r = gfx_v9_0_kcq_resume(adev);
3683         if (r)
3684                 return r;
3685
3686         if (adev->asic_type != CHIP_ARCTURUS) {
3687                 ring = &adev->gfx.gfx_ring[0];
3688                 r = amdgpu_ring_test_helper(ring);
3689                 if (r)
3690                         return r;
3691         }
3692
3693         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3694                 ring = &adev->gfx.compute_ring[i];
3695                 amdgpu_ring_test_helper(ring);
3696         }
3697
3698         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3699
3700         return 0;
3701 }
3702
3703 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3704 {
3705         u32 tmp;
3706
3707         if (adev->asic_type != CHIP_ARCTURUS)
3708                 return;
3709
3710         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3711         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3712                                 adev->df.hash_status.hash_64k);
3713         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3714                                 adev->df.hash_status.hash_2m);
3715         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3716                                 adev->df.hash_status.hash_1g);
3717         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3718 }
3719
3720 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3721 {
3722         if (adev->asic_type != CHIP_ARCTURUS)
3723                 gfx_v9_0_cp_gfx_enable(adev, enable);
3724         gfx_v9_0_cp_compute_enable(adev, enable);
3725 }
3726
3727 static int gfx_v9_0_hw_init(void *handle)
3728 {
3729         int r;
3730         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3731
3732         if (!amdgpu_sriov_vf(adev))
3733                 gfx_v9_0_init_golden_registers(adev);
3734
3735         gfx_v9_0_constants_init(adev);
3736
3737         gfx_v9_0_init_tcp_config(adev);
3738
3739         r = adev->gfx.rlc.funcs->resume(adev);
3740         if (r)
3741                 return r;
3742
3743         r = gfx_v9_0_cp_resume(adev);
3744         if (r)
3745                 return r;
3746
3747         return r;
3748 }
3749
3750 static int gfx_v9_0_hw_fini(void *handle)
3751 {
3752         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3753
3754         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3755         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3756         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3757
3758         /* DF freeze and kcq disable will fail */
3759         if (!amdgpu_ras_intr_triggered())
3760                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3761                 amdgpu_gfx_disable_kcq(adev);
3762
3763         if (amdgpu_sriov_vf(adev)) {
3764                 gfx_v9_0_cp_gfx_enable(adev, false);
3765                 /* must disable polling for SRIOV when hw finished, otherwise
3766                  * CPC engine may still keep fetching WB address which is already
3767                  * invalid after sw finished and trigger DMAR reading error in
3768                  * hypervisor side.
3769                  */
3770                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3771                 return 0;
3772         }
3773
3774         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3775          * otherwise KIQ is hanging when binding back
3776          */
3777         if (!adev->in_gpu_reset && !adev->in_suspend) {
3778                 mutex_lock(&adev->srbm_mutex);
3779                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3780                                 adev->gfx.kiq.ring.pipe,
3781                                 adev->gfx.kiq.ring.queue, 0);
3782                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3783                 soc15_grbm_select(adev, 0, 0, 0, 0);
3784                 mutex_unlock(&adev->srbm_mutex);
3785         }
3786
3787         gfx_v9_0_cp_enable(adev, false);
3788         adev->gfx.rlc.funcs->stop(adev);
3789
3790         return 0;
3791 }
3792
3793 static int gfx_v9_0_suspend(void *handle)
3794 {
3795         return gfx_v9_0_hw_fini(handle);
3796 }
3797
3798 static int gfx_v9_0_resume(void *handle)
3799 {
3800         return gfx_v9_0_hw_init(handle);
3801 }
3802
3803 static bool gfx_v9_0_is_idle(void *handle)
3804 {
3805         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3806
3807         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3808                                 GRBM_STATUS, GUI_ACTIVE))
3809                 return false;
3810         else
3811                 return true;
3812 }
3813
3814 static int gfx_v9_0_wait_for_idle(void *handle)
3815 {
3816         unsigned i;
3817         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3818
3819         for (i = 0; i < adev->usec_timeout; i++) {
3820                 if (gfx_v9_0_is_idle(handle))
3821                         return 0;
3822                 udelay(1);
3823         }
3824         return -ETIMEDOUT;
3825 }
3826
3827 static int gfx_v9_0_soft_reset(void *handle)
3828 {
3829         u32 grbm_soft_reset = 0;
3830         u32 tmp;
3831         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3832
3833         /* GRBM_STATUS */
3834         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3835         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3836                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3837                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3838                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3839                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3840                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3841                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3842                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3843                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3844                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3845         }
3846
3847         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3848                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3849                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3850         }
3851
3852         /* GRBM_STATUS2 */
3853         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3854         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3855                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3856                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3857
3858
3859         if (grbm_soft_reset) {
3860                 /* stop the rlc */
3861                 adev->gfx.rlc.funcs->stop(adev);
3862
3863                 if (adev->asic_type != CHIP_ARCTURUS)
3864                         /* Disable GFX parsing/prefetching */
3865                         gfx_v9_0_cp_gfx_enable(adev, false);
3866
3867                 /* Disable MEC parsing/prefetching */
3868                 gfx_v9_0_cp_compute_enable(adev, false);
3869
3870                 if (grbm_soft_reset) {
3871                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3872                         tmp |= grbm_soft_reset;
3873                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3874                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3875                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3876
3877                         udelay(50);
3878
3879                         tmp &= ~grbm_soft_reset;
3880                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3881                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3882                 }
3883
3884                 /* Wait a little for things to settle down */
3885                 udelay(50);
3886         }
3887         return 0;
3888 }
3889
3890 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3891 {
3892         uint64_t clock;
3893
3894         mutex_lock(&adev->gfx.gpu_clock_mutex);
3895         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3896                 uint32_t tmp, lsb, msb, i = 0;
3897                 do {
3898                         if (i != 0)
3899                                 udelay(1);
3900                         tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3901                         lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3902                         msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3903                         i++;
3904                 } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3905                 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3906         } else {
3907                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3908                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3909                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3910         }
3911         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3912         return clock;
3913 }
3914
3915 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3916                                           uint32_t vmid,
3917                                           uint32_t gds_base, uint32_t gds_size,
3918                                           uint32_t gws_base, uint32_t gws_size,
3919                                           uint32_t oa_base, uint32_t oa_size)
3920 {
3921         struct amdgpu_device *adev = ring->adev;
3922
3923         /* GDS Base */
3924         gfx_v9_0_write_data_to_reg(ring, 0, false,
3925                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3926                                    gds_base);
3927
3928         /* GDS Size */
3929         gfx_v9_0_write_data_to_reg(ring, 0, false,
3930                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3931                                    gds_size);
3932
3933         /* GWS */
3934         gfx_v9_0_write_data_to_reg(ring, 0, false,
3935                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3936                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3937
3938         /* OA */
3939         gfx_v9_0_write_data_to_reg(ring, 0, false,
3940                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3941                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3942 }
3943
3944 static const u32 vgpr_init_compute_shader[] =
3945 {
3946         0xb07c0000, 0xbe8000ff,
3947         0x000000f8, 0xbf110800,
3948         0x7e000280, 0x7e020280,
3949         0x7e040280, 0x7e060280,
3950         0x7e080280, 0x7e0a0280,
3951         0x7e0c0280, 0x7e0e0280,
3952         0x80808800, 0xbe803200,
3953         0xbf84fff5, 0xbf9c0000,
3954         0xd28c0001, 0x0001007f,
3955         0xd28d0001, 0x0002027e,
3956         0x10020288, 0xb8810904,
3957         0xb7814000, 0xd1196a01,
3958         0x00000301, 0xbe800087,
3959         0xbefc00c1, 0xd89c4000,
3960         0x00020201, 0xd89cc080,
3961         0x00040401, 0x320202ff,
3962         0x00000800, 0x80808100,
3963         0xbf84fff8, 0x7e020280,
3964         0xbf810000, 0x00000000,
3965 };
3966
3967 static const u32 sgpr_init_compute_shader[] =
3968 {
3969         0xb07c0000, 0xbe8000ff,
3970         0x0000005f, 0xbee50080,
3971         0xbe812c65, 0xbe822c65,
3972         0xbe832c65, 0xbe842c65,
3973         0xbe852c65, 0xb77c0005,
3974         0x80808500, 0xbf84fff8,
3975         0xbe800080, 0xbf810000,
3976 };
3977
3978 /* When below register arrays changed, please update gpr_reg_size,
3979   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
3980   to cover all gfx9 ASICs */
3981 static const struct soc15_reg_entry vgpr_init_regs[] = {
3982    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3983    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3984    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
3985    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3986    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
3987    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3988    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3989    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3990    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3991    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3992    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
3993    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
3994    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
3995    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
3996 };
3997
3998 static const struct soc15_reg_entry sgpr1_init_regs[] = {
3999    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4000    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4001    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4002    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4003    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4004    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4005    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4006    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4007    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4008    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4009    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4010    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4011    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4012    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4013 };
4014
4015 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4016    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4017    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4018    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4019    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4020    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4021    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4022    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4023    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4024    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4025    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4026    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4027    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4028    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4029    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4030 };
4031
4032 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4033    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4034    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4035    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4036    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4037    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4038    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4039    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4040    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4041    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4042    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4043    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4044    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4045    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4046    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4047    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4048    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4049    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4050    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4051    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4052    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4053    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4054    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4055    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4056    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4057    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4058    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4059    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4060    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4061    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4062    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4063    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4064    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4065    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4066    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4067 };
4068
4069 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4070 {
4071         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4072         int i, r;
4073
4074         /* only support when RAS is enabled */
4075         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4076                 return 0;
4077
4078         r = amdgpu_ring_alloc(ring, 7);
4079         if (r) {
4080                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4081                         ring->name, r);
4082                 return r;
4083         }
4084
4085         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4086         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4087
4088         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4089         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4090                                 PACKET3_DMA_DATA_DST_SEL(1) |
4091                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4092                                 PACKET3_DMA_DATA_ENGINE(0)));
4093         amdgpu_ring_write(ring, 0);
4094         amdgpu_ring_write(ring, 0);
4095         amdgpu_ring_write(ring, 0);
4096         amdgpu_ring_write(ring, 0);
4097         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4098                                 adev->gds.gds_size);
4099
4100         amdgpu_ring_commit(ring);
4101
4102         for (i = 0; i < adev->usec_timeout; i++) {
4103                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4104                         break;
4105                 udelay(1);
4106         }
4107
4108         if (i >= adev->usec_timeout)
4109                 r = -ETIMEDOUT;
4110
4111         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4112
4113         return r;
4114 }
4115
4116 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4117 {
4118         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4119         struct amdgpu_ib ib;
4120         struct dma_fence *f = NULL;
4121         int r, i, j, k;
4122         unsigned total_size, vgpr_offset, sgpr_offset;
4123         u64 gpu_addr;
4124
4125         int compute_dim_x = adev->gfx.config.max_shader_engines *
4126                                                 adev->gfx.config.max_cu_per_sh *
4127                                                 adev->gfx.config.max_sh_per_se;
4128         int sgpr_work_group_size = 5;
4129         int gpr_reg_size = compute_dim_x / 16 + 6;
4130
4131         /* only support when RAS is enabled */
4132         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4133                 return 0;
4134
4135         /* bail if the compute ring is not ready */
4136         if (!ring->sched.ready)
4137                 return 0;
4138
4139         total_size =
4140                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4141         total_size +=
4142                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4143         total_size +=
4144                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4145         total_size = ALIGN(total_size, 256);
4146         vgpr_offset = total_size;
4147         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4148         sgpr_offset = total_size;
4149         total_size += sizeof(sgpr_init_compute_shader);
4150
4151         /* allocate an indirect buffer to put the commands in */
4152         memset(&ib, 0, sizeof(ib));
4153         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4154         if (r) {
4155                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4156                 return r;
4157         }
4158
4159         /* load the compute shaders */
4160         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4161                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4162
4163         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4164                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4165
4166         /* init the ib length to 0 */
4167         ib.length_dw = 0;
4168
4169         /* VGPR */
4170         /* write the register state for the compute dispatch */
4171         for (i = 0; i < gpr_reg_size; i++) {
4172                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4173                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4174                                                                 - PACKET3_SET_SH_REG_START;
4175                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4176         }
4177         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4178         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4179         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4180         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4181                                                         - PACKET3_SET_SH_REG_START;
4182         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4183         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4184
4185         /* write dispatch packet */
4186         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4187         ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4188         ib.ptr[ib.length_dw++] = 1; /* y */
4189         ib.ptr[ib.length_dw++] = 1; /* z */
4190         ib.ptr[ib.length_dw++] =
4191                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4192
4193         /* write CS partial flush packet */
4194         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4195         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4196
4197         /* SGPR1 */
4198         /* write the register state for the compute dispatch */
4199         for (i = 0; i < gpr_reg_size; i++) {
4200                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4201                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4202                                                                 - PACKET3_SET_SH_REG_START;
4203                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4204         }
4205         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4206         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4207         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4208         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4209                                                         - PACKET3_SET_SH_REG_START;
4210         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4211         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4212
4213         /* write dispatch packet */
4214         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4215         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4216         ib.ptr[ib.length_dw++] = 1; /* y */
4217         ib.ptr[ib.length_dw++] = 1; /* z */
4218         ib.ptr[ib.length_dw++] =
4219                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4220
4221         /* write CS partial flush packet */
4222         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4223         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4224
4225         /* SGPR2 */
4226         /* write the register state for the compute dispatch */
4227         for (i = 0; i < gpr_reg_size; i++) {
4228                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4229                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4230                                                                 - PACKET3_SET_SH_REG_START;
4231                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4232         }
4233         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4234         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4235         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4236         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4237                                                         - PACKET3_SET_SH_REG_START;
4238         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4239         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4240
4241         /* write dispatch packet */
4242         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4243         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4244         ib.ptr[ib.length_dw++] = 1; /* y */
4245         ib.ptr[ib.length_dw++] = 1; /* z */
4246         ib.ptr[ib.length_dw++] =
4247                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4248
4249         /* write CS partial flush packet */
4250         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4251         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4252
4253         /* shedule the ib on the ring */
4254         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4255         if (r) {
4256                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4257                 goto fail;
4258         }
4259
4260         /* wait for the GPU to finish processing the IB */
4261         r = dma_fence_wait(f, false);
4262         if (r) {
4263                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4264                 goto fail;
4265         }
4266
4267         /* read back registers to clear the counters */
4268         mutex_lock(&adev->grbm_idx_mutex);
4269         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4270                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4271                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4272                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4273                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4274                         }
4275                 }
4276         }
4277         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4278         mutex_unlock(&adev->grbm_idx_mutex);
4279
4280 fail:
4281         amdgpu_ib_free(adev, &ib, NULL);
4282         dma_fence_put(f);
4283
4284         return r;
4285 }
4286
4287 static int gfx_v9_0_early_init(void *handle)
4288 {
4289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4290
4291         if (adev->asic_type == CHIP_ARCTURUS)
4292                 adev->gfx.num_gfx_rings = 0;
4293         else
4294                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4295         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4296         gfx_v9_0_set_kiq_pm4_funcs(adev);
4297         gfx_v9_0_set_ring_funcs(adev);
4298         gfx_v9_0_set_irq_funcs(adev);
4299         gfx_v9_0_set_gds_init(adev);
4300         gfx_v9_0_set_rlc_funcs(adev);
4301
4302         return 0;
4303 }
4304
4305 static int gfx_v9_0_ecc_late_init(void *handle)
4306 {
4307         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4308         int r;
4309
4310         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4311         if (r)
4312                 return r;
4313
4314         /* requires IBs so do in late init after IB pool is initialized */
4315         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4316         if (r)
4317                 return r;
4318
4319         r = amdgpu_gfx_ras_late_init(adev);
4320         if (r)
4321                 return r;
4322
4323         return 0;
4324 }
4325
4326 static int gfx_v9_0_late_init(void *handle)
4327 {
4328         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4329         int r;
4330
4331         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4332         if (r)
4333                 return r;
4334
4335         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4336         if (r)
4337                 return r;
4338
4339         r = gfx_v9_0_ecc_late_init(handle);
4340         if (r)
4341                 return r;
4342
4343         return 0;
4344 }
4345
4346 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4347 {
4348         uint32_t rlc_setting;
4349
4350         /* if RLC is not enabled, do nothing */
4351         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4352         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4353                 return false;
4354
4355         return true;
4356 }
4357
4358 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4359 {
4360         uint32_t data;
4361         unsigned i;
4362
4363         data = RLC_SAFE_MODE__CMD_MASK;
4364         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4365         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4366
4367         /* wait for RLC_SAFE_MODE */
4368         for (i = 0; i < adev->usec_timeout; i++) {
4369                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4370                         break;
4371                 udelay(1);
4372         }
4373 }
4374
4375 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4376 {
4377         uint32_t data;
4378
4379         data = RLC_SAFE_MODE__CMD_MASK;
4380         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4381 }
4382
4383 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4384                                                 bool enable)
4385 {
4386         amdgpu_gfx_rlc_enter_safe_mode(adev);
4387
4388         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4389                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4390                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4391                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4392         } else {
4393                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4394                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4395                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4396         }
4397
4398         amdgpu_gfx_rlc_exit_safe_mode(adev);
4399 }
4400
4401 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4402                                                 bool enable)
4403 {
4404         /* TODO: double check if we need to perform under safe mode */
4405         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4406
4407         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4408                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4409         else
4410                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4411
4412         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4413                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4414         else
4415                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4416
4417         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4418 }
4419
4420 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4421                                                       bool enable)
4422 {
4423         uint32_t data, def;
4424
4425         amdgpu_gfx_rlc_enter_safe_mode(adev);
4426
4427         /* It is disabled by HW by default */
4428         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4429                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4430                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4431
4432                 if (adev->asic_type != CHIP_VEGA12)
4433                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4434
4435                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4436                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4437                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4438
4439                 /* only for Vega10 & Raven1 */
4440                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4441
4442                 if (def != data)
4443                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4444
4445                 /* MGLS is a global flag to control all MGLS in GFX */
4446                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4447                         /* 2 - RLC memory Light sleep */
4448                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4449                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4450                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4451                                 if (def != data)
4452                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4453                         }
4454                         /* 3 - CP memory Light sleep */
4455                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4456                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4457                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4458                                 if (def != data)
4459                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4460                         }
4461                 }
4462         } else {
4463                 /* 1 - MGCG_OVERRIDE */
4464                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4465
4466                 if (adev->asic_type != CHIP_VEGA12)
4467                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4468
4469                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4470                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4471                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4472                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4473
4474                 if (def != data)
4475                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4476
4477                 /* 2 - disable MGLS in RLC */
4478                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4479                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4480                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4481                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4482                 }
4483
4484                 /* 3 - disable MGLS in CP */
4485                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4486                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4487                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4488                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4489                 }
4490         }
4491
4492         amdgpu_gfx_rlc_exit_safe_mode(adev);
4493 }
4494
4495 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4496                                            bool enable)
4497 {
4498         uint32_t data, def;
4499
4500         if (adev->asic_type == CHIP_ARCTURUS)
4501                 return;
4502
4503         amdgpu_gfx_rlc_enter_safe_mode(adev);
4504
4505         /* Enable 3D CGCG/CGLS */
4506         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4507                 /* write cmd to clear cgcg/cgls ov */
4508                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4509                 /* unset CGCG override */
4510                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4511                 /* update CGCG and CGLS override bits */
4512                 if (def != data)
4513                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4514
4515                 /* enable 3Dcgcg FSM(0x0000363f) */
4516                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4517
4518                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4519                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4520                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4521                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4522                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4523                 if (def != data)
4524                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4525
4526                 /* set IDLE_POLL_COUNT(0x00900100) */
4527                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4528                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4529                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4530                 if (def != data)
4531                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4532         } else {
4533                 /* Disable CGCG/CGLS */
4534                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4535                 /* disable cgcg, cgls should be disabled */
4536                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4537                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4538                 /* disable cgcg and cgls in FSM */
4539                 if (def != data)
4540                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4541         }
4542
4543         amdgpu_gfx_rlc_exit_safe_mode(adev);
4544 }
4545
4546 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4547                                                       bool enable)
4548 {
4549         uint32_t def, data;
4550
4551         amdgpu_gfx_rlc_enter_safe_mode(adev);
4552
4553         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4554                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4555                 /* unset CGCG override */
4556                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4557                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4558                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4559                 else
4560                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4561                 /* update CGCG and CGLS override bits */
4562                 if (def != data)
4563                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4564
4565                 /* enable cgcg FSM(0x0000363F) */
4566                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4567
4568                 if (adev->asic_type == CHIP_ARCTURUS)
4569                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4570                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4571                 else
4572                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4573                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4574                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4575                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4576                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4577                 if (def != data)
4578                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4579
4580                 /* set IDLE_POLL_COUNT(0x00900100) */
4581                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4582                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4583                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4584                 if (def != data)
4585                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4586         } else {
4587                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4588                 /* reset CGCG/CGLS bits */
4589                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4590                 /* disable cgcg and cgls in FSM */
4591                 if (def != data)
4592                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4593         }
4594
4595         amdgpu_gfx_rlc_exit_safe_mode(adev);
4596 }
4597
4598 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4599                                             bool enable)
4600 {
4601         if (enable) {
4602                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4603                  * ===  MGCG + MGLS ===
4604                  */
4605                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4606                 /* ===  CGCG /CGLS for GFX 3D Only === */
4607                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4608                 /* ===  CGCG + CGLS === */
4609                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4610         } else {
4611                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4612                  * ===  CGCG + CGLS ===
4613                  */
4614                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4615                 /* ===  CGCG /CGLS for GFX 3D Only === */
4616                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4617                 /* ===  MGCG + MGLS === */
4618                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4619         }
4620         return 0;
4621 }
4622
4623 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4624         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4625         .set_safe_mode = gfx_v9_0_set_safe_mode,
4626         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4627         .init = gfx_v9_0_rlc_init,
4628         .get_csb_size = gfx_v9_0_get_csb_size,
4629         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4630         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4631         .resume = gfx_v9_0_rlc_resume,
4632         .stop = gfx_v9_0_rlc_stop,
4633         .reset = gfx_v9_0_rlc_reset,
4634         .start = gfx_v9_0_rlc_start
4635 };
4636
4637 static int gfx_v9_0_set_powergating_state(void *handle,
4638                                           enum amd_powergating_state state)
4639 {
4640         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4641         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4642
4643         switch (adev->asic_type) {
4644         case CHIP_RAVEN:
4645         case CHIP_RENOIR:
4646                 if (!enable) {
4647                         amdgpu_gfx_off_ctrl(adev, false);
4648                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4649                 }
4650                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4651                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4652                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4653                 } else {
4654                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4655                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4656                 }
4657
4658                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4659                         gfx_v9_0_enable_cp_power_gating(adev, true);
4660                 else
4661                         gfx_v9_0_enable_cp_power_gating(adev, false);
4662
4663                 /* update gfx cgpg state */
4664                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4665
4666                 /* update mgcg state */
4667                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4668
4669                 if (enable)
4670                         amdgpu_gfx_off_ctrl(adev, true);
4671                 break;
4672         case CHIP_VEGA12:
4673                 if (!enable) {
4674                         amdgpu_gfx_off_ctrl(adev, false);
4675                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4676                 } else {
4677                         amdgpu_gfx_off_ctrl(adev, true);
4678                 }
4679                 break;
4680         default:
4681                 break;
4682         }
4683
4684         return 0;
4685 }
4686
4687 static int gfx_v9_0_set_clockgating_state(void *handle,
4688                                           enum amd_clockgating_state state)
4689 {
4690         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4691
4692         if (amdgpu_sriov_vf(adev))
4693                 return 0;
4694
4695         switch (adev->asic_type) {
4696         case CHIP_VEGA10:
4697         case CHIP_VEGA12:
4698         case CHIP_VEGA20:
4699         case CHIP_RAVEN:
4700         case CHIP_ARCTURUS:
4701         case CHIP_RENOIR:
4702                 gfx_v9_0_update_gfx_clock_gating(adev,
4703                                                  state == AMD_CG_STATE_GATE ? true : false);
4704                 break;
4705         default:
4706                 break;
4707         }
4708         return 0;
4709 }
4710
4711 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4712 {
4713         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4714         int data;
4715
4716         if (amdgpu_sriov_vf(adev))
4717                 *flags = 0;
4718
4719         /* AMD_CG_SUPPORT_GFX_MGCG */
4720         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4721         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4722                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4723
4724         /* AMD_CG_SUPPORT_GFX_CGCG */
4725         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4726         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4727                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4728
4729         /* AMD_CG_SUPPORT_GFX_CGLS */
4730         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4731                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4732
4733         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4734         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4735         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4736                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4737
4738         /* AMD_CG_SUPPORT_GFX_CP_LS */
4739         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4740         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4741                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4742
4743         if (adev->asic_type != CHIP_ARCTURUS) {
4744                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4745                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4746                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4747                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4748
4749                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4750                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4751                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4752         }
4753 }
4754
4755 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4756 {
4757         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4758 }
4759
4760 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4761 {
4762         struct amdgpu_device *adev = ring->adev;
4763         u64 wptr;
4764
4765         /* XXX check if swapping is necessary on BE */
4766         if (ring->use_doorbell) {
4767                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4768         } else {
4769                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4770                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4771         }
4772
4773         return wptr;
4774 }
4775
4776 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4777 {
4778         struct amdgpu_device *adev = ring->adev;
4779
4780         if (ring->use_doorbell) {
4781                 /* XXX check if swapping is necessary on BE */
4782                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4783                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4784         } else {
4785                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4786                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4787         }
4788 }
4789
4790 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4791 {
4792         struct amdgpu_device *adev = ring->adev;
4793         u32 ref_and_mask, reg_mem_engine;
4794         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4795
4796         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4797                 switch (ring->me) {
4798                 case 1:
4799                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4800                         break;
4801                 case 2:
4802                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4803                         break;
4804                 default:
4805                         return;
4806                 }
4807                 reg_mem_engine = 0;
4808         } else {
4809                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4810                 reg_mem_engine = 1; /* pfp */
4811         }
4812
4813         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4814                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4815                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4816                               ref_and_mask, ref_and_mask, 0x20);
4817 }
4818
4819 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4820                                         struct amdgpu_job *job,
4821                                         struct amdgpu_ib *ib,
4822                                         uint32_t flags)
4823 {
4824         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4825         u32 header, control = 0;
4826
4827         if (ib->flags & AMDGPU_IB_FLAG_CE)
4828                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4829         else
4830                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4831
4832         control |= ib->length_dw | (vmid << 24);
4833
4834         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4835                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4836
4837                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4838                         gfx_v9_0_ring_emit_de_meta(ring);
4839         }
4840
4841         amdgpu_ring_write(ring, header);
4842         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4843         amdgpu_ring_write(ring,
4844 #ifdef __BIG_ENDIAN
4845                 (2 << 0) |
4846 #endif
4847                 lower_32_bits(ib->gpu_addr));
4848         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4849         amdgpu_ring_write(ring, control);
4850 }
4851
4852 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4853                                           struct amdgpu_job *job,
4854                                           struct amdgpu_ib *ib,
4855                                           uint32_t flags)
4856 {
4857         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4858         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4859
4860         /* Currently, there is a high possibility to get wave ID mismatch
4861          * between ME and GDS, leading to a hw deadlock, because ME generates
4862          * different wave IDs than the GDS expects. This situation happens
4863          * randomly when at least 5 compute pipes use GDS ordered append.
4864          * The wave IDs generated by ME are also wrong after suspend/resume.
4865          * Those are probably bugs somewhere else in the kernel driver.
4866          *
4867          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4868          * GDS to 0 for this ring (me/pipe).
4869          */
4870         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4871                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4872                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4873                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4874         }
4875
4876         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4877         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4878         amdgpu_ring_write(ring,
4879 #ifdef __BIG_ENDIAN
4880                                 (2 << 0) |
4881 #endif
4882                                 lower_32_bits(ib->gpu_addr));
4883         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4884         amdgpu_ring_write(ring, control);
4885 }
4886
4887 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4888                                      u64 seq, unsigned flags)
4889 {
4890         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4891         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4892         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4893
4894         /* RELEASE_MEM - flush caches, send int */
4895         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4896         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4897                                                EOP_TC_NC_ACTION_EN) :
4898                                               (EOP_TCL1_ACTION_EN |
4899                                                EOP_TC_ACTION_EN |
4900                                                EOP_TC_WB_ACTION_EN |
4901                                                EOP_TC_MD_ACTION_EN)) |
4902                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4903                                  EVENT_INDEX(5)));
4904         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4905
4906         /*
4907          * the address should be Qword aligned if 64bit write, Dword
4908          * aligned if only send 32bit data low (discard data high)
4909          */
4910         if (write64bit)
4911                 BUG_ON(addr & 0x7);
4912         else
4913                 BUG_ON(addr & 0x3);
4914         amdgpu_ring_write(ring, lower_32_bits(addr));
4915         amdgpu_ring_write(ring, upper_32_bits(addr));
4916         amdgpu_ring_write(ring, lower_32_bits(seq));
4917         amdgpu_ring_write(ring, upper_32_bits(seq));
4918         amdgpu_ring_write(ring, 0);
4919 }
4920
4921 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4922 {
4923         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4924         uint32_t seq = ring->fence_drv.sync_seq;
4925         uint64_t addr = ring->fence_drv.gpu_addr;
4926
4927         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4928                               lower_32_bits(addr), upper_32_bits(addr),
4929                               seq, 0xffffffff, 4);
4930 }
4931
4932 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4933                                         unsigned vmid, uint64_t pd_addr)
4934 {
4935         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4936
4937         /* compute doesn't have PFP */
4938         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4939                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4940                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4941                 amdgpu_ring_write(ring, 0x0);
4942         }
4943 }
4944
4945 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4946 {
4947         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4948 }
4949
4950 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4951 {
4952         u64 wptr;
4953
4954         /* XXX check if swapping is necessary on BE */
4955         if (ring->use_doorbell)
4956                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4957         else
4958                 BUG();
4959         return wptr;
4960 }
4961
4962 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4963                                            bool acquire)
4964 {
4965         struct amdgpu_device *adev = ring->adev;
4966         int pipe_num, tmp, reg;
4967         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4968
4969         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4970
4971         /* first me only has 2 entries, GFX and HP3D */
4972         if (ring->me > 0)
4973                 pipe_num -= 2;
4974
4975         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4976         tmp = RREG32(reg);
4977         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4978         WREG32(reg, tmp);
4979 }
4980
4981 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4982                                             struct amdgpu_ring *ring,
4983                                             bool acquire)
4984 {
4985         int i, pipe;
4986         bool reserve;
4987         struct amdgpu_ring *iring;
4988
4989         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4990         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4991         if (acquire)
4992                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4993         else
4994                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4995
4996         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4997                 /* Clear all reservations - everyone reacquires all resources */
4998                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4999                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5000                                                        true);
5001
5002                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5003                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5004                                                        true);
5005         } else {
5006                 /* Lower all pipes without a current reservation */
5007                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5008                         iring = &adev->gfx.gfx_ring[i];
5009                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5010                                                            iring->me,
5011                                                            iring->pipe,
5012                                                            0);
5013                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5014                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5015                 }
5016
5017                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5018                         iring = &adev->gfx.compute_ring[i];
5019                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5020                                                            iring->me,
5021                                                            iring->pipe,
5022                                                            0);
5023                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5024                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5025                 }
5026         }
5027
5028         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5029 }
5030
5031 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5032                                       struct amdgpu_ring *ring,
5033                                       bool acquire)
5034 {
5035         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5036         uint32_t queue_priority = acquire ? 0xf : 0x0;
5037
5038         mutex_lock(&adev->srbm_mutex);
5039         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5040
5041         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5042         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5043
5044         soc15_grbm_select(adev, 0, 0, 0, 0);
5045         mutex_unlock(&adev->srbm_mutex);
5046 }
5047
5048 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5049                                                enum drm_sched_priority priority)
5050 {
5051         struct amdgpu_device *adev = ring->adev;
5052         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5053
5054         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5055                 return;
5056
5057         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5058         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5059 }
5060
5061 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5062 {
5063         struct amdgpu_device *adev = ring->adev;
5064
5065         /* XXX check if swapping is necessary on BE */
5066         if (ring->use_doorbell) {
5067                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5068                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5069         } else{
5070                 BUG(); /* only DOORBELL method supported on gfx9 now */
5071         }
5072 }
5073
5074 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5075                                          u64 seq, unsigned int flags)
5076 {
5077         struct amdgpu_device *adev = ring->adev;
5078
5079         /* we only allocate 32bit for each seq wb address */
5080         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5081
5082         /* write fence seq to the "addr" */
5083         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5084         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5085                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5086         amdgpu_ring_write(ring, lower_32_bits(addr));
5087         amdgpu_ring_write(ring, upper_32_bits(addr));
5088         amdgpu_ring_write(ring, lower_32_bits(seq));
5089
5090         if (flags & AMDGPU_FENCE_FLAG_INT) {
5091                 /* set register to trigger INT */
5092                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5093                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5094                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5095                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5096                 amdgpu_ring_write(ring, 0);
5097                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5098         }
5099 }
5100
5101 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5102 {
5103         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5104         amdgpu_ring_write(ring, 0);
5105 }
5106
5107 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5108 {
5109         struct v9_ce_ib_state ce_payload = {0};
5110         uint64_t csa_addr;
5111         int cnt;
5112
5113         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5114         csa_addr = amdgpu_csa_vaddr(ring->adev);
5115
5116         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5117         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5118                                  WRITE_DATA_DST_SEL(8) |
5119                                  WR_CONFIRM) |
5120                                  WRITE_DATA_CACHE_POLICY(0));
5121         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5122         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5123         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5124 }
5125
5126 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5127 {
5128         struct v9_de_ib_state de_payload = {0};
5129         uint64_t csa_addr, gds_addr;
5130         int cnt;
5131
5132         csa_addr = amdgpu_csa_vaddr(ring->adev);
5133         gds_addr = csa_addr + 4096;
5134         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5135         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5136
5137         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5138         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5139         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5140                                  WRITE_DATA_DST_SEL(8) |
5141                                  WR_CONFIRM) |
5142                                  WRITE_DATA_CACHE_POLICY(0));
5143         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5144         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5145         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5146 }
5147
5148 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5149 {
5150         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5151         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5152 }
5153
5154 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5155 {
5156         uint32_t dw2 = 0;
5157
5158         if (amdgpu_sriov_vf(ring->adev))
5159                 gfx_v9_0_ring_emit_ce_meta(ring);
5160
5161         gfx_v9_0_ring_emit_tmz(ring, true);
5162
5163         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5164         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5165                 /* set load_global_config & load_global_uconfig */
5166                 dw2 |= 0x8001;
5167                 /* set load_cs_sh_regs */
5168                 dw2 |= 0x01000000;
5169                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5170                 dw2 |= 0x10002;
5171
5172                 /* set load_ce_ram if preamble presented */
5173                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5174                         dw2 |= 0x10000000;
5175         } else {
5176                 /* still load_ce_ram if this is the first time preamble presented
5177                  * although there is no context switch happens.
5178                  */
5179                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5180                         dw2 |= 0x10000000;
5181         }
5182
5183         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5184         amdgpu_ring_write(ring, dw2);
5185         amdgpu_ring_write(ring, 0);
5186 }
5187
5188 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5189 {
5190         unsigned ret;
5191         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5192         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5193         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5194         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5195         ret = ring->wptr & ring->buf_mask;
5196         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5197         return ret;
5198 }
5199
5200 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5201 {
5202         unsigned cur;
5203         BUG_ON(offset > ring->buf_mask);
5204         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5205
5206         cur = (ring->wptr & ring->buf_mask) - 1;
5207         if (likely(cur > offset))
5208                 ring->ring[offset] = cur - offset;
5209         else
5210                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5211 }
5212
5213 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5214 {
5215         struct amdgpu_device *adev = ring->adev;
5216
5217         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5218         amdgpu_ring_write(ring, 0 |     /* src: register*/
5219                                 (5 << 8) |      /* dst: memory */
5220                                 (1 << 20));     /* write confirm */
5221         amdgpu_ring_write(ring, reg);
5222         amdgpu_ring_write(ring, 0);
5223         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5224                                 adev->virt.reg_val_offs * 4));
5225         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5226                                 adev->virt.reg_val_offs * 4));
5227 }
5228
5229 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5230                                     uint32_t val)
5231 {
5232         uint32_t cmd = 0;
5233
5234         switch (ring->funcs->type) {
5235         case AMDGPU_RING_TYPE_GFX:
5236                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5237                 break;
5238         case AMDGPU_RING_TYPE_KIQ:
5239                 cmd = (1 << 16); /* no inc addr */
5240                 break;
5241         default:
5242                 cmd = WR_CONFIRM;
5243                 break;
5244         }
5245         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5246         amdgpu_ring_write(ring, cmd);
5247         amdgpu_ring_write(ring, reg);
5248         amdgpu_ring_write(ring, 0);
5249         amdgpu_ring_write(ring, val);
5250 }
5251
5252 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5253                                         uint32_t val, uint32_t mask)
5254 {
5255         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5256 }
5257
5258 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5259                                                   uint32_t reg0, uint32_t reg1,
5260                                                   uint32_t ref, uint32_t mask)
5261 {
5262         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5263         struct amdgpu_device *adev = ring->adev;
5264         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5265                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5266
5267         if (fw_version_ok)
5268                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5269                                       ref, mask, 0x20);
5270         else
5271                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5272                                                            ref, mask);
5273 }
5274
5275 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5276 {
5277         struct amdgpu_device *adev = ring->adev;
5278         uint32_t value = 0;
5279
5280         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5281         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5282         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5283         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5284         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5285 }
5286
5287 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5288                                                  enum amdgpu_interrupt_state state)
5289 {
5290         switch (state) {
5291         case AMDGPU_IRQ_STATE_DISABLE:
5292         case AMDGPU_IRQ_STATE_ENABLE:
5293                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5294                                TIME_STAMP_INT_ENABLE,
5295                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5296                 break;
5297         default:
5298                 break;
5299         }
5300 }
5301
5302 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5303                                                      int me, int pipe,
5304                                                      enum amdgpu_interrupt_state state)
5305 {
5306         u32 mec_int_cntl, mec_int_cntl_reg;
5307
5308         /*
5309          * amdgpu controls only the first MEC. That's why this function only
5310          * handles the setting of interrupts for this specific MEC. All other
5311          * pipes' interrupts are set by amdkfd.
5312          */
5313
5314         if (me == 1) {
5315                 switch (pipe) {
5316                 case 0:
5317                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5318                         break;
5319                 case 1:
5320                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5321                         break;
5322                 case 2:
5323                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5324                         break;
5325                 case 3:
5326                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5327                         break;
5328                 default:
5329                         DRM_DEBUG("invalid pipe %d\n", pipe);
5330                         return;
5331                 }
5332         } else {
5333                 DRM_DEBUG("invalid me %d\n", me);
5334                 return;
5335         }
5336
5337         switch (state) {
5338         case AMDGPU_IRQ_STATE_DISABLE:
5339                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5340                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5341                                              TIME_STAMP_INT_ENABLE, 0);
5342                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5343                 break;
5344         case AMDGPU_IRQ_STATE_ENABLE:
5345                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5346                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5347                                              TIME_STAMP_INT_ENABLE, 1);
5348                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5349                 break;
5350         default:
5351                 break;
5352         }
5353 }
5354
5355 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5356                                              struct amdgpu_irq_src *source,
5357                                              unsigned type,
5358                                              enum amdgpu_interrupt_state state)
5359 {
5360         switch (state) {
5361         case AMDGPU_IRQ_STATE_DISABLE:
5362         case AMDGPU_IRQ_STATE_ENABLE:
5363                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5364                                PRIV_REG_INT_ENABLE,
5365                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5366                 break;
5367         default:
5368                 break;
5369         }
5370
5371         return 0;
5372 }
5373
5374 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5375                                               struct amdgpu_irq_src *source,
5376                                               unsigned type,
5377                                               enum amdgpu_interrupt_state state)
5378 {
5379         switch (state) {
5380         case AMDGPU_IRQ_STATE_DISABLE:
5381         case AMDGPU_IRQ_STATE_ENABLE:
5382                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5383                                PRIV_INSTR_INT_ENABLE,
5384                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5385         default:
5386                 break;
5387         }
5388
5389         return 0;
5390 }
5391
5392 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5393         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5394                         CP_ECC_ERROR_INT_ENABLE, 1)
5395
5396 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5397         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5398                         CP_ECC_ERROR_INT_ENABLE, 0)
5399
5400 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5401                                               struct amdgpu_irq_src *source,
5402                                               unsigned type,
5403                                               enum amdgpu_interrupt_state state)
5404 {
5405         switch (state) {
5406         case AMDGPU_IRQ_STATE_DISABLE:
5407                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5408                                 CP_ECC_ERROR_INT_ENABLE, 0);
5409                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5410                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5411                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5412                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5413                 break;
5414
5415         case AMDGPU_IRQ_STATE_ENABLE:
5416                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5417                                 CP_ECC_ERROR_INT_ENABLE, 1);
5418                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5419                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5420                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5421                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5422                 break;
5423         default:
5424                 break;
5425         }
5426
5427         return 0;
5428 }
5429
5430
5431 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5432                                             struct amdgpu_irq_src *src,
5433                                             unsigned type,
5434                                             enum amdgpu_interrupt_state state)
5435 {
5436         switch (type) {
5437         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5438                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5439                 break;
5440         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5441                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5442                 break;
5443         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5444                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5445                 break;
5446         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5447                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5448                 break;
5449         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5450                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5451                 break;
5452         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5453                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5454                 break;
5455         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5456                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5457                 break;
5458         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5459                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5460                 break;
5461         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5462                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5463                 break;
5464         default:
5465                 break;
5466         }
5467         return 0;
5468 }
5469
5470 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5471                             struct amdgpu_irq_src *source,
5472                             struct amdgpu_iv_entry *entry)
5473 {
5474         int i;
5475         u8 me_id, pipe_id, queue_id;
5476         struct amdgpu_ring *ring;
5477
5478         DRM_DEBUG("IH: CP EOP\n");
5479         me_id = (entry->ring_id & 0x0c) >> 2;
5480         pipe_id = (entry->ring_id & 0x03) >> 0;
5481         queue_id = (entry->ring_id & 0x70) >> 4;
5482
5483         switch (me_id) {
5484         case 0:
5485                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5486                 break;
5487         case 1:
5488         case 2:
5489                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5490                         ring = &adev->gfx.compute_ring[i];
5491                         /* Per-queue interrupt is supported for MEC starting from VI.
5492                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5493                           */
5494                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5495                                 amdgpu_fence_process(ring);
5496                 }
5497                 break;
5498         }
5499         return 0;
5500 }
5501
5502 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5503                            struct amdgpu_iv_entry *entry)
5504 {
5505         u8 me_id, pipe_id, queue_id;
5506         struct amdgpu_ring *ring;
5507         int i;
5508
5509         me_id = (entry->ring_id & 0x0c) >> 2;
5510         pipe_id = (entry->ring_id & 0x03) >> 0;
5511         queue_id = (entry->ring_id & 0x70) >> 4;
5512
5513         switch (me_id) {
5514         case 0:
5515                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5516                 break;
5517         case 1:
5518         case 2:
5519                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5520                         ring = &adev->gfx.compute_ring[i];
5521                         if (ring->me == me_id && ring->pipe == pipe_id &&
5522                             ring->queue == queue_id)
5523                                 drm_sched_fault(&ring->sched);
5524                 }
5525                 break;
5526         }
5527 }
5528
5529 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5530                                  struct amdgpu_irq_src *source,
5531                                  struct amdgpu_iv_entry *entry)
5532 {
5533         DRM_ERROR("Illegal register access in command stream\n");
5534         gfx_v9_0_fault(adev, entry);
5535         return 0;
5536 }
5537
5538 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5539                                   struct amdgpu_irq_src *source,
5540                                   struct amdgpu_iv_entry *entry)
5541 {
5542         DRM_ERROR("Illegal instruction in command stream\n");
5543         gfx_v9_0_fault(adev, entry);
5544         return 0;
5545 }
5546
5547
5548 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = {
5549         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5550           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5551           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5552         },
5553         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5554           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5555           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5556         },
5557         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5558           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5559           0, 0
5560         },
5561         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5562           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5563           0, 0
5564         },
5565         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5566           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5567           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5568         },
5569         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5570           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5571           0, 0
5572         },
5573         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5574           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5575           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5576         },
5577         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5578           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5579           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5580         },
5581         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5582           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5583           0, 0
5584         },
5585         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5586           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5587           0, 0
5588         },
5589         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5590           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5591           0, 0
5592         },
5593         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5594           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5595           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5596         },
5597         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5598           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5599           0, 0
5600         },
5601         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5602           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5603           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5604         },
5605         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5606           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5607           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5608           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5609         },
5610         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5611           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5612           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5613           0, 0
5614         },
5615         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5616           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5617           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5618           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5619         },
5620         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5621           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5622           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5623           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5624         },
5625         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5626           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5627           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5628           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5629         },
5630         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5631           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5632           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5633           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5634         },
5635         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5636           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5637           0, 0
5638         },
5639         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5640           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5641           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5642         },
5643         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5644           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5645           0, 0
5646         },
5647         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5648           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5649           0, 0
5650         },
5651         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5652           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5653           0, 0
5654         },
5655         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5656           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5657           0, 0
5658         },
5659         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5660           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5661           0, 0
5662         },
5663         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5664           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5665           0, 0
5666         },
5667         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5668           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5669           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5670         },
5671         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5672           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5673           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5674         },
5675         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5676           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5677           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5678         },
5679         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5680           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5681           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5682         },
5683         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5684           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5685           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5686         },
5687         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5688           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5689           0, 0
5690         },
5691         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5692           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5693           0, 0
5694         },
5695         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5696           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5697           0, 0
5698         },
5699         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5700           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5701           0, 0
5702         },
5703         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5704           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5705           0, 0
5706         },
5707         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5708           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5709           0, 0
5710         },
5711         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5712           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5713           0, 0
5714         },
5715         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5716           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5717           0, 0
5718         },
5719         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5720           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5721           0, 0
5722         },
5723         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5724           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5725           0, 0
5726         },
5727         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5728           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5729           0, 0
5730         },
5731         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5732           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5733           0, 0
5734         },
5735         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5736           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5737           0, 0
5738         },
5739         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5740           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5741           0, 0
5742         },
5743         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5744           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5745           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5746         },
5747         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5748           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5749           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5750         },
5751         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5752           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5753           0, 0
5754         },
5755         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5756           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5757           0, 0
5758         },
5759         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5760           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5761           0, 0
5762         },
5763         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5764           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5765           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5766         },
5767         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5768           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5769           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5770         },
5771         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5772           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5773           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5774         },
5775         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5776           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5777           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5778         },
5779         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5780           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5781           0, 0
5782         },
5783         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5784           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5785           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5786         },
5787         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5788           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5789           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5790         },
5791         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5792           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5793           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5794         },
5795         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5796           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5797           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5798         },
5799         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5800           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5801           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5802         },
5803         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5804           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5805           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5806         },
5807         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5808           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5809           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5810         },
5811         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5812           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5813           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5814         },
5815         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5816           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5817           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5818         },
5819         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5820           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5821           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5822         },
5823         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5824           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5825           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5826         },
5827         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5828           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5829           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5830         },
5831         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5832           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5833           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5834         },
5835         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5836           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5837           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5838         },
5839         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5840           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5841           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5842         },
5843         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5844           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5845           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5846         },
5847         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5848           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5849           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5850         },
5851         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5852           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5853           0, 0
5854         },
5855         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5856           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5857           0, 0
5858         },
5859         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5860           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5861           0, 0
5862         },
5863         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5864           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5865           0, 0
5866         },
5867         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5868           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5869           0, 0
5870         },
5871         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5872           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5873           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5874         },
5875         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5876           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5877           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5878         },
5879         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5880           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5881           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5882         },
5883         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5884           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5885           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5886         },
5887         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5888           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5889           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5890         },
5891         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5892           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5893           0, 0
5894         },
5895         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5896           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5897           0, 0
5898         },
5899         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5900           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5901           0, 0
5902         },
5903         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5904           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5905           0, 0
5906         },
5907         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5908           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5909           0, 0
5910         },
5911         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5912           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5913           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5914         },
5915         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5916           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5917           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5918         },
5919         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5920           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5921           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5922         },
5923         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5924           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5925           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5926         },
5927         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5928           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5929           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5930         },
5931         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5932           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5933           0, 0
5934         },
5935         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5936           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5937           0, 0
5938         },
5939         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5940           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5941           0, 0
5942         },
5943         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5944           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5945           0, 0
5946         },
5947         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5948           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5949           0, 0
5950         },
5951         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5952           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5953           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5954         },
5955         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5956           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5957           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5958         },
5959         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5960           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5961           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5962         },
5963         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5964           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5965           0, 0
5966         },
5967         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5968           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5969           0, 0
5970         },
5971         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5972           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5973           0, 0
5974         },
5975         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5976           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5977           0, 0
5978         },
5979         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5980           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5981           0, 0
5982         },
5983         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5984           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5985           0, 0
5986         }
5987 };
5988
5989 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5990                                      void *inject_if)
5991 {
5992         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5993         int ret;
5994         struct ta_ras_trigger_error_input block_info = { 0 };
5995
5996         if (adev->asic_type != CHIP_VEGA20)
5997                 return -EINVAL;
5998
5999         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6000                 return -EINVAL;
6001
6002         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6003                 return -EPERM;
6004
6005         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6006               info->head.type)) {
6007                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6008                         ras_gfx_subblocks[info->head.sub_block_index].name,
6009                         info->head.type);
6010                 return -EPERM;
6011         }
6012
6013         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6014               info->head.type)) {
6015                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6016                         ras_gfx_subblocks[info->head.sub_block_index].name,
6017                         info->head.type);
6018                 return -EPERM;
6019         }
6020
6021         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6022         block_info.sub_block_index =
6023                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6024         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6025         block_info.address = info->address;
6026         block_info.value = info->value;
6027
6028         mutex_lock(&adev->grbm_idx_mutex);
6029         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6030         mutex_unlock(&adev->grbm_idx_mutex);
6031
6032         return ret;
6033 }
6034
6035 static const char *vml2_mems[] = {
6036         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6037         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6038         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6039         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6040         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6041         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6042         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6043         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6044         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6045         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6046         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6047         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6048         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6049         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6050         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6051         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6052 };
6053
6054 static const char *vml2_walker_mems[] = {
6055         "UTC_VML2_CACHE_PDE0_MEM0",
6056         "UTC_VML2_CACHE_PDE0_MEM1",
6057         "UTC_VML2_CACHE_PDE1_MEM0",
6058         "UTC_VML2_CACHE_PDE1_MEM1",
6059         "UTC_VML2_CACHE_PDE2_MEM0",
6060         "UTC_VML2_CACHE_PDE2_MEM1",
6061         "UTC_VML2_RDIF_LOG_FIFO",
6062 };
6063
6064 static const char *atc_l2_cache_2m_mems[] = {
6065         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6066         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6067         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6068         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6069 };
6070
6071 static const char *atc_l2_cache_4k_mems[] = {
6072         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6073         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6074         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6075         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6076         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6077         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6078         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6079         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6080         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6081         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6082         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6083         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6084         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6085         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6086         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6087         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6088         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6089         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6090         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6091         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6092         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6093         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6094         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6095         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6096         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6097         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6098         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6099         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6100         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6101         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6102         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6103         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6104 };
6105
6106 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6107                                          struct ras_err_data *err_data)
6108 {
6109         uint32_t i, data;
6110         uint32_t sec_count, ded_count;
6111
6112         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6113         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6114         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6115         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6116         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6117         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6118         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6119         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6120
6121         for (i = 0; i < 16; i++) {
6122                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6123                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6124
6125                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6126                 if (sec_count) {
6127                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6128                                  vml2_mems[i], sec_count);
6129                         err_data->ce_count += sec_count;
6130                 }
6131
6132                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6133                 if (ded_count) {
6134                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6135                                  vml2_mems[i], ded_count);
6136                         err_data->ue_count += ded_count;
6137                 }
6138         }
6139
6140         for (i = 0; i < 7; i++) {
6141                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6142                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6143
6144                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6145                                                 SEC_COUNT);
6146                 if (sec_count) {
6147                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6148                                  vml2_walker_mems[i], sec_count);
6149                         err_data->ce_count += sec_count;
6150                 }
6151
6152                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6153                                                 DED_COUNT);
6154                 if (ded_count) {
6155                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6156                                  vml2_walker_mems[i], ded_count);
6157                         err_data->ue_count += ded_count;
6158                 }
6159         }
6160
6161         for (i = 0; i < 4; i++) {
6162                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6163                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6164
6165                 sec_count = (data & 0x00006000L) >> 0xd;
6166                 if (sec_count) {
6167                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6168                                  atc_l2_cache_2m_mems[i], sec_count);
6169                         err_data->ce_count += sec_count;
6170                 }
6171         }
6172
6173         for (i = 0; i < 32; i++) {
6174                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6175                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6176
6177                 sec_count = (data & 0x00006000L) >> 0xd;
6178                 if (sec_count) {
6179                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6180                                  atc_l2_cache_4k_mems[i], sec_count);
6181                         err_data->ce_count += sec_count;
6182                 }
6183
6184                 ded_count = (data & 0x00018000L) >> 0xf;
6185                 if (ded_count) {
6186                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6187                                  atc_l2_cache_4k_mems[i], ded_count);
6188                         err_data->ue_count += ded_count;
6189                 }
6190         }
6191
6192         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6193         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6194         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6195         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6196
6197         return 0;
6198 }
6199
6200 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6201         uint32_t se_id, uint32_t inst_id, uint32_t value,
6202         uint32_t *sec_count, uint32_t *ded_count)
6203 {
6204         uint32_t i;
6205         uint32_t sec_cnt, ded_cnt;
6206
6207         for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) {
6208                 if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset ||
6209                         gc_ras_fields_vg20[i].seg != reg->seg ||
6210                         gc_ras_fields_vg20[i].inst != reg->inst)
6211                         continue;
6212
6213                 sec_cnt = (value &
6214                                 gc_ras_fields_vg20[i].sec_count_mask) >>
6215                                 gc_ras_fields_vg20[i].sec_count_shift;
6216                 if (sec_cnt) {
6217                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6218                                 gc_ras_fields_vg20[i].name,
6219                                 se_id, inst_id,
6220                                 sec_cnt);
6221                         *sec_count += sec_cnt;
6222                 }
6223
6224                 ded_cnt = (value &
6225                                 gc_ras_fields_vg20[i].ded_count_mask) >>
6226                                 gc_ras_fields_vg20[i].ded_count_shift;
6227                 if (ded_cnt) {
6228                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6229                                 gc_ras_fields_vg20[i].name,
6230                                 se_id, inst_id,
6231                                 ded_cnt);
6232                         *ded_count += ded_cnt;
6233                 }
6234         }
6235
6236         return 0;
6237 }
6238
6239 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6240                                           void *ras_error_status)
6241 {
6242         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6243         uint32_t sec_count = 0, ded_count = 0;
6244         uint32_t i, j, k;
6245         uint32_t reg_value;
6246
6247         if (adev->asic_type != CHIP_VEGA20)
6248                 return -EINVAL;
6249
6250         err_data->ue_count = 0;
6251         err_data->ce_count = 0;
6252
6253         mutex_lock(&adev->grbm_idx_mutex);
6254
6255         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6256                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6257                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6258                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6259                                 reg_value =
6260                                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6261                                 if (reg_value)
6262                                         __get_ras_error_count(&sec_ded_counter_registers[i],
6263                                                         j, k, reg_value,
6264                                                         &sec_count, &ded_count);
6265                         }
6266                 }
6267         }
6268
6269         err_data->ce_count += sec_count;
6270         err_data->ue_count += ded_count;
6271
6272         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6273         mutex_unlock(&adev->grbm_idx_mutex);
6274
6275         gfx_v9_0_query_utc_edc_status(adev, err_data);
6276
6277         return 0;
6278 }
6279
6280 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6281         .name = "gfx_v9_0",
6282         .early_init = gfx_v9_0_early_init,
6283         .late_init = gfx_v9_0_late_init,
6284         .sw_init = gfx_v9_0_sw_init,
6285         .sw_fini = gfx_v9_0_sw_fini,
6286         .hw_init = gfx_v9_0_hw_init,
6287         .hw_fini = gfx_v9_0_hw_fini,
6288         .suspend = gfx_v9_0_suspend,
6289         .resume = gfx_v9_0_resume,
6290         .is_idle = gfx_v9_0_is_idle,
6291         .wait_for_idle = gfx_v9_0_wait_for_idle,
6292         .soft_reset = gfx_v9_0_soft_reset,
6293         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6294         .set_powergating_state = gfx_v9_0_set_powergating_state,
6295         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6296 };
6297
6298 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6299         .type = AMDGPU_RING_TYPE_GFX,
6300         .align_mask = 0xff,
6301         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6302         .support_64bit_ptrs = true,
6303         .vmhub = AMDGPU_GFXHUB_0,
6304         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6305         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6306         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6307         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6308                 5 +  /* COND_EXEC */
6309                 7 +  /* PIPELINE_SYNC */
6310                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6311                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6312                 2 + /* VM_FLUSH */
6313                 8 +  /* FENCE for VM_FLUSH */
6314                 20 + /* GDS switch */
6315                 4 + /* double SWITCH_BUFFER,
6316                        the first COND_EXEC jump to the place just
6317                            prior to this double SWITCH_BUFFER  */
6318                 5 + /* COND_EXEC */
6319                 7 +      /*     HDP_flush */
6320                 4 +      /*     VGT_flush */
6321                 14 + /* CE_META */
6322                 31 + /* DE_META */
6323                 3 + /* CNTX_CTRL */
6324                 5 + /* HDP_INVL */
6325                 8 + 8 + /* FENCE x2 */
6326                 2, /* SWITCH_BUFFER */
6327         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6328         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6329         .emit_fence = gfx_v9_0_ring_emit_fence,
6330         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6331         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6332         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6333         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6334         .test_ring = gfx_v9_0_ring_test_ring,
6335         .test_ib = gfx_v9_0_ring_test_ib,
6336         .insert_nop = amdgpu_ring_insert_nop,
6337         .pad_ib = amdgpu_ring_generic_pad_ib,
6338         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6339         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6340         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6341         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6342         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6343         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6344         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6345         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6346         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6347 };
6348
6349 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6350         .type = AMDGPU_RING_TYPE_COMPUTE,
6351         .align_mask = 0xff,
6352         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6353         .support_64bit_ptrs = true,
6354         .vmhub = AMDGPU_GFXHUB_0,
6355         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6356         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6357         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6358         .emit_frame_size =
6359                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6360                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6361                 5 + /* hdp invalidate */
6362                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6363                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6364                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6365                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6366                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6367         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6368         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6369         .emit_fence = gfx_v9_0_ring_emit_fence,
6370         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6371         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6372         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6373         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6374         .test_ring = gfx_v9_0_ring_test_ring,
6375         .test_ib = gfx_v9_0_ring_test_ib,
6376         .insert_nop = amdgpu_ring_insert_nop,
6377         .pad_ib = amdgpu_ring_generic_pad_ib,
6378         .set_priority = gfx_v9_0_ring_set_priority_compute,
6379         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6380         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6381         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6382 };
6383
6384 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6385         .type = AMDGPU_RING_TYPE_KIQ,
6386         .align_mask = 0xff,
6387         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6388         .support_64bit_ptrs = true,
6389         .vmhub = AMDGPU_GFXHUB_0,
6390         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6391         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6392         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6393         .emit_frame_size =
6394                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6395                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6396                 5 + /* hdp invalidate */
6397                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6398                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6399                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6400                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6401                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6402         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6403         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6404         .test_ring = gfx_v9_0_ring_test_ring,
6405         .insert_nop = amdgpu_ring_insert_nop,
6406         .pad_ib = amdgpu_ring_generic_pad_ib,
6407         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6408         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6409         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6410         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6411 };
6412
6413 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6414 {
6415         int i;
6416
6417         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6418
6419         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6420                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6421
6422         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6423                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6424 }
6425
6426 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6427         .set = gfx_v9_0_set_eop_interrupt_state,
6428         .process = gfx_v9_0_eop_irq,
6429 };
6430
6431 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6432         .set = gfx_v9_0_set_priv_reg_fault_state,
6433         .process = gfx_v9_0_priv_reg_irq,
6434 };
6435
6436 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6437         .set = gfx_v9_0_set_priv_inst_fault_state,
6438         .process = gfx_v9_0_priv_inst_irq,
6439 };
6440
6441 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6442         .set = gfx_v9_0_set_cp_ecc_error_state,
6443         .process = amdgpu_gfx_cp_ecc_error_irq,
6444 };
6445
6446
6447 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6448 {
6449         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6450         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6451
6452         adev->gfx.priv_reg_irq.num_types = 1;
6453         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6454
6455         adev->gfx.priv_inst_irq.num_types = 1;
6456         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6457
6458         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6459         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6460 }
6461
6462 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6463 {
6464         switch (adev->asic_type) {
6465         case CHIP_VEGA10:
6466         case CHIP_VEGA12:
6467         case CHIP_VEGA20:
6468         case CHIP_RAVEN:
6469         case CHIP_ARCTURUS:
6470         case CHIP_RENOIR:
6471                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6472                 break;
6473         default:
6474                 break;
6475         }
6476 }
6477
6478 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6479 {
6480         /* init asci gds info */
6481         switch (adev->asic_type) {
6482         case CHIP_VEGA10:
6483         case CHIP_VEGA12:
6484         case CHIP_VEGA20:
6485                 adev->gds.gds_size = 0x10000;
6486                 break;
6487         case CHIP_RAVEN:
6488         case CHIP_ARCTURUS:
6489                 adev->gds.gds_size = 0x1000;
6490                 break;
6491         default:
6492                 adev->gds.gds_size = 0x10000;
6493                 break;
6494         }
6495
6496         switch (adev->asic_type) {
6497         case CHIP_VEGA10:
6498         case CHIP_VEGA20:
6499                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6500                 break;
6501         case CHIP_VEGA12:
6502                 adev->gds.gds_compute_max_wave_id = 0x27f;
6503                 break;
6504         case CHIP_RAVEN:
6505                 if (adev->rev_id >= 0x8)
6506                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6507                 else
6508                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6509                 break;
6510         case CHIP_ARCTURUS:
6511                 adev->gds.gds_compute_max_wave_id = 0xfff;
6512                 break;
6513         default:
6514                 /* this really depends on the chip */
6515                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6516                 break;
6517         }
6518
6519         adev->gds.gws_size = 64;
6520         adev->gds.oa_size = 16;
6521 }
6522
6523 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6524                                                  u32 bitmap)
6525 {
6526         u32 data;
6527
6528         if (!bitmap)
6529                 return;
6530
6531         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6532         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6533
6534         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6535 }
6536
6537 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6538 {
6539         u32 data, mask;
6540
6541         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6542         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6543
6544         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6545         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6546
6547         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6548
6549         return (~data) & mask;
6550 }
6551
6552 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6553                                  struct amdgpu_cu_info *cu_info)
6554 {
6555         int i, j, k, counter, active_cu_number = 0;
6556         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6557         unsigned disable_masks[4 * 4];
6558
6559         if (!adev || !cu_info)
6560                 return -EINVAL;
6561
6562         /*
6563          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6564          */
6565         if (adev->gfx.config.max_shader_engines *
6566                 adev->gfx.config.max_sh_per_se > 16)
6567                 return -EINVAL;
6568
6569         amdgpu_gfx_parse_disable_cu(disable_masks,
6570                                     adev->gfx.config.max_shader_engines,
6571                                     adev->gfx.config.max_sh_per_se);
6572
6573         mutex_lock(&adev->grbm_idx_mutex);
6574         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6575                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6576                         mask = 1;
6577                         ao_bitmap = 0;
6578                         counter = 0;
6579                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6580                         gfx_v9_0_set_user_cu_inactive_bitmap(
6581                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6582                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6583
6584                         /*
6585                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6586                          * 4x4 size array, and it's usually suitable for Vega
6587                          * ASICs which has 4*2 SE/SH layout.
6588                          * But for Arcturus, SE/SH layout is changed to 8*1.
6589                          * To mostly reduce the impact, we make it compatible
6590                          * with current bitmap array as below:
6591                          *    SE4,SH0 --> bitmap[0][1]
6592                          *    SE5,SH0 --> bitmap[1][1]
6593                          *    SE6,SH0 --> bitmap[2][1]
6594                          *    SE7,SH0 --> bitmap[3][1]
6595                          */
6596                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6597
6598                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6599                                 if (bitmap & mask) {
6600                                         if (counter < adev->gfx.config.max_cu_per_sh)
6601                                                 ao_bitmap |= mask;
6602                                         counter ++;
6603                                 }
6604                                 mask <<= 1;
6605                         }
6606                         active_cu_number += counter;
6607                         if (i < 2 && j < 2)
6608                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6609                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6610                 }
6611         }
6612         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6613         mutex_unlock(&adev->grbm_idx_mutex);
6614
6615         cu_info->number = active_cu_number;
6616         cu_info->ao_cu_mask = ao_cu_mask;
6617         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6618
6619         return 0;
6620 }
6621
6622 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6623 {
6624         .type = AMD_IP_BLOCK_TYPE_GFX,
6625         .major = 9,
6626         .minor = 0,
6627         .rev = 0,
6628         .funcs = &gfx_v9_0_ip_funcs,
6629 };