Merge tag 'defconfig-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55
56 #include "smu/smu_7_1_3_d.h"
57
58 #include "ivsrcid/ivsrcid_vislands30.h"
59
60 #define GFX8_NUM_GFX_RINGS     1
61 #define GFX8_MEC_HPD_SIZE 4096
62
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67
68 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
84
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD    1
87 #define CLE_BPM_SERDES_CMD    0
88
89 /* BPM Register Address*/
90 enum {
91         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
92         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
93         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
94         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
95         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
96         BPM_REG_FGCG_MAX
97 };
98
99 #define RLC_FormatDirectRegListLength        14
100
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196
197 static const u32 golden_settings_tonga_a11[] =
198 {
199         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202         mmGB_GPU_ID, 0x0000000f, 0x00000000,
203         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216
217 static const u32 tonga_golden_common_all[] =
218 {
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307
308 static const u32 golden_settings_vegam_a11[] =
309 {
310         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320         mmSQ_CONFIG, 0x07f80000, 0x01180000,
321         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328
329 static const u32 vegam_golden_common_all[] =
330 {
331         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351         mmSQ_CONFIG, 0x07f80000, 0x01180000,
352         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 polaris11_golden_common_all[] =
361 {
362         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383         mmSQ_CONFIG, 0x07f80000, 0x07180000,
384         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390
391 static const u32 polaris10_golden_common_all[] =
392 {
393         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402
403 static const u32 fiji_golden_common_all[] =
404 {
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416
417 static const u32 golden_settings_fiji_a10[] =
418 {
419         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470
471 static const u32 golden_settings_iceland_a11[] =
472 {
473         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476         mmGB_GPU_ID, 0x0000000f, 0x00000000,
477         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490
491 static const u32 iceland_golden_common_all[] =
492 {
493         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570
571 static const u32 cz_golden_settings_a11[] =
572 {
573         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575         mmGB_GPU_ID, 0x0000000f, 0x00000000,
576         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586
587 static const u32 cz_golden_common_all[] =
588 {
589         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677
678 static const u32 stoney_golden_settings_a11[] =
679 {
680         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681         mmGB_GPU_ID, 0x0000000f, 0x00000000,
682         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691
692 static const u32 stoney_golden_common_all[] =
693 {
694         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712
713
714 static const char * const sq_edc_source_names[] = {
715         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
735
736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738         uint32_t data;
739
740         switch (adev->asic_type) {
741         case CHIP_TOPAZ:
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_mgcg_cgcg_init,
744                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
745                 amdgpu_device_program_register_sequence(adev,
746                                                         golden_settings_iceland_a11,
747                                                         ARRAY_SIZE(golden_settings_iceland_a11));
748                 amdgpu_device_program_register_sequence(adev,
749                                                         iceland_golden_common_all,
750                                                         ARRAY_SIZE(iceland_golden_common_all));
751                 break;
752         case CHIP_FIJI:
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_mgcg_cgcg_init,
755                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
756                 amdgpu_device_program_register_sequence(adev,
757                                                         golden_settings_fiji_a10,
758                                                         ARRAY_SIZE(golden_settings_fiji_a10));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         fiji_golden_common_all,
761                                                         ARRAY_SIZE(fiji_golden_common_all));
762                 break;
763
764         case CHIP_TONGA:
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_mgcg_cgcg_init,
767                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
768                 amdgpu_device_program_register_sequence(adev,
769                                                         golden_settings_tonga_a11,
770                                                         ARRAY_SIZE(golden_settings_tonga_a11));
771                 amdgpu_device_program_register_sequence(adev,
772                                                         tonga_golden_common_all,
773                                                         ARRAY_SIZE(tonga_golden_common_all));
774                 break;
775         case CHIP_VEGAM:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_vegam_a11,
778                                                         ARRAY_SIZE(golden_settings_vegam_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         vegam_golden_common_all,
781                                                         ARRAY_SIZE(vegam_golden_common_all));
782                 break;
783         case CHIP_POLARIS11:
784         case CHIP_POLARIS12:
785                 amdgpu_device_program_register_sequence(adev,
786                                                         golden_settings_polaris11_a11,
787                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
788                 amdgpu_device_program_register_sequence(adev,
789                                                         polaris11_golden_common_all,
790                                                         ARRAY_SIZE(polaris11_golden_common_all));
791                 break;
792         case CHIP_POLARIS10:
793                 amdgpu_device_program_register_sequence(adev,
794                                                         golden_settings_polaris10_a11,
795                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
796                 amdgpu_device_program_register_sequence(adev,
797                                                         polaris10_golden_common_all,
798                                                         ARRAY_SIZE(polaris10_golden_common_all));
799                 data = RREG32_SMC(ixCG_ACLK_CNTL);
800                 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801                 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802                 WREG32_SMC(ixCG_ACLK_CNTL, data);
803                 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809                 }
810                 break;
811         case CHIP_CARRIZO:
812                 amdgpu_device_program_register_sequence(adev,
813                                                         cz_mgcg_cgcg_init,
814                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
815                 amdgpu_device_program_register_sequence(adev,
816                                                         cz_golden_settings_a11,
817                                                         ARRAY_SIZE(cz_golden_settings_a11));
818                 amdgpu_device_program_register_sequence(adev,
819                                                         cz_golden_common_all,
820                                                         ARRAY_SIZE(cz_golden_common_all));
821                 break;
822         case CHIP_STONEY:
823                 amdgpu_device_program_register_sequence(adev,
824                                                         stoney_mgcg_cgcg_init,
825                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
826                 amdgpu_device_program_register_sequence(adev,
827                                                         stoney_golden_settings_a11,
828                                                         ARRAY_SIZE(stoney_golden_settings_a11));
829                 amdgpu_device_program_register_sequence(adev,
830                                                         stoney_golden_common_all,
831                                                         ARRAY_SIZE(stoney_golden_common_all));
832                 break;
833         default:
834                 break;
835         }
836 }
837
838 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
839 {
840         adev->gfx.scratch.num_reg = 8;
841         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
842         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
843 }
844
845 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
846 {
847         struct amdgpu_device *adev = ring->adev;
848         uint32_t scratch;
849         uint32_t tmp = 0;
850         unsigned i;
851         int r;
852
853         r = amdgpu_gfx_scratch_get(adev, &scratch);
854         if (r)
855                 return r;
856
857         WREG32(scratch, 0xCAFEDEAD);
858         r = amdgpu_ring_alloc(ring, 3);
859         if (r)
860                 goto error_free_scratch;
861
862         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
863         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
864         amdgpu_ring_write(ring, 0xDEADBEEF);
865         amdgpu_ring_commit(ring);
866
867         for (i = 0; i < adev->usec_timeout; i++) {
868                 tmp = RREG32(scratch);
869                 if (tmp == 0xDEADBEEF)
870                         break;
871                 udelay(1);
872         }
873
874         if (i >= adev->usec_timeout)
875                 r = -ETIMEDOUT;
876
877 error_free_scratch:
878         amdgpu_gfx_scratch_free(adev, scratch);
879         return r;
880 }
881
882 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
883 {
884         struct amdgpu_device *adev = ring->adev;
885         struct amdgpu_ib ib;
886         struct dma_fence *f = NULL;
887
888         unsigned int index;
889         uint64_t gpu_addr;
890         uint32_t tmp;
891         long r;
892
893         r = amdgpu_device_wb_get(adev, &index);
894         if (r)
895                 return r;
896
897         gpu_addr = adev->wb.gpu_addr + (index * 4);
898         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
899         memset(&ib, 0, sizeof(ib));
900         r = amdgpu_ib_get(adev, NULL, 16,
901                                         AMDGPU_IB_POOL_DIRECT, &ib);
902         if (r)
903                 goto err1;
904
905         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
906         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
907         ib.ptr[2] = lower_32_bits(gpu_addr);
908         ib.ptr[3] = upper_32_bits(gpu_addr);
909         ib.ptr[4] = 0xDEADBEEF;
910         ib.length_dw = 5;
911
912         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
913         if (r)
914                 goto err2;
915
916         r = dma_fence_wait_timeout(f, false, timeout);
917         if (r == 0) {
918                 r = -ETIMEDOUT;
919                 goto err2;
920         } else if (r < 0) {
921                 goto err2;
922         }
923
924         tmp = adev->wb.wb[index];
925         if (tmp == 0xDEADBEEF)
926                 r = 0;
927         else
928                 r = -EINVAL;
929
930 err2:
931         amdgpu_ib_free(adev, &ib, NULL);
932         dma_fence_put(f);
933 err1:
934         amdgpu_device_wb_free(adev, index);
935         return r;
936 }
937
938
939 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
940 {
941         release_firmware(adev->gfx.pfp_fw);
942         adev->gfx.pfp_fw = NULL;
943         release_firmware(adev->gfx.me_fw);
944         adev->gfx.me_fw = NULL;
945         release_firmware(adev->gfx.ce_fw);
946         adev->gfx.ce_fw = NULL;
947         release_firmware(adev->gfx.rlc_fw);
948         adev->gfx.rlc_fw = NULL;
949         release_firmware(adev->gfx.mec_fw);
950         adev->gfx.mec_fw = NULL;
951         if ((adev->asic_type != CHIP_STONEY) &&
952             (adev->asic_type != CHIP_TOPAZ))
953                 release_firmware(adev->gfx.mec2_fw);
954         adev->gfx.mec2_fw = NULL;
955
956         kfree(adev->gfx.rlc.register_list_format);
957 }
958
959 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
960 {
961         const char *chip_name;
962         char fw_name[30];
963         int err;
964         struct amdgpu_firmware_info *info = NULL;
965         const struct common_firmware_header *header = NULL;
966         const struct gfx_firmware_header_v1_0 *cp_hdr;
967         const struct rlc_firmware_header_v2_0 *rlc_hdr;
968         unsigned int *tmp = NULL, i;
969
970         DRM_DEBUG("\n");
971
972         switch (adev->asic_type) {
973         case CHIP_TOPAZ:
974                 chip_name = "topaz";
975                 break;
976         case CHIP_TONGA:
977                 chip_name = "tonga";
978                 break;
979         case CHIP_CARRIZO:
980                 chip_name = "carrizo";
981                 break;
982         case CHIP_FIJI:
983                 chip_name = "fiji";
984                 break;
985         case CHIP_STONEY:
986                 chip_name = "stoney";
987                 break;
988         case CHIP_POLARIS10:
989                 chip_name = "polaris10";
990                 break;
991         case CHIP_POLARIS11:
992                 chip_name = "polaris11";
993                 break;
994         case CHIP_POLARIS12:
995                 chip_name = "polaris12";
996                 break;
997         case CHIP_VEGAM:
998                 chip_name = "vegam";
999                 break;
1000         default:
1001                 BUG();
1002         }
1003
1004         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1005                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1006                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1007                 if (err == -ENOENT) {
1008                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1009                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1010                 }
1011         } else {
1012                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1013                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1014         }
1015         if (err)
1016                 goto out;
1017         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1018         if (err)
1019                 goto out;
1020         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1021         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1022         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023
1024         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1026                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1027                 if (err == -ENOENT) {
1028                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1029                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1030                 }
1031         } else {
1032                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1033                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1034         }
1035         if (err)
1036                 goto out;
1037         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1038         if (err)
1039                 goto out;
1040         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1041         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042
1043         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1044
1045         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1046                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1047                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1048                 if (err == -ENOENT) {
1049                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1050                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1051                 }
1052         } else {
1053                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1054                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1055         }
1056         if (err)
1057                 goto out;
1058         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1059         if (err)
1060                 goto out;
1061         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1062         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1063         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1064
1065         /*
1066          * Support for MCBP/Virtualization in combination with chained IBs is
1067          * formal released on feature version #46
1068          */
1069         if (adev->gfx.ce_feature_version >= 46 &&
1070             adev->gfx.pfp_feature_version >= 46) {
1071                 adev->virt.chained_ib_support = true;
1072                 DRM_INFO("Chained IB support enabled!\n");
1073         } else
1074                 adev->virt.chained_ib_support = false;
1075
1076         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1077         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1078         if (err)
1079                 goto out;
1080         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1081         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1082         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1083         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1084
1085         adev->gfx.rlc.save_and_restore_offset =
1086                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1087         adev->gfx.rlc.clear_state_descriptor_offset =
1088                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1089         adev->gfx.rlc.avail_scratch_ram_locations =
1090                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1091         adev->gfx.rlc.reg_restore_list_size =
1092                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1093         adev->gfx.rlc.reg_list_format_start =
1094                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1095         adev->gfx.rlc.reg_list_format_separate_start =
1096                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1097         adev->gfx.rlc.starting_offsets_start =
1098                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1099         adev->gfx.rlc.reg_list_format_size_bytes =
1100                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1101         adev->gfx.rlc.reg_list_size_bytes =
1102                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1103
1104         adev->gfx.rlc.register_list_format =
1105                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1106                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1107
1108         if (!adev->gfx.rlc.register_list_format) {
1109                 err = -ENOMEM;
1110                 goto out;
1111         }
1112
1113         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1114                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1115         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1116                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1117
1118         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1119
1120         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1121                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1122         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1123                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1124
1125         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1126                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1127                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1128                 if (err == -ENOENT) {
1129                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1130                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1131                 }
1132         } else {
1133                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1134                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1135         }
1136         if (err)
1137                 goto out;
1138         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1139         if (err)
1140                 goto out;
1141         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1142         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1143         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1144
1145         if ((adev->asic_type != CHIP_STONEY) &&
1146             (adev->asic_type != CHIP_TOPAZ)) {
1147                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1148                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1149                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1150                         if (err == -ENOENT) {
1151                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1152                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1153                         }
1154                 } else {
1155                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1156                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1157                 }
1158                 if (!err) {
1159                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1160                         if (err)
1161                                 goto out;
1162                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1163                                 adev->gfx.mec2_fw->data;
1164                         adev->gfx.mec2_fw_version =
1165                                 le32_to_cpu(cp_hdr->header.ucode_version);
1166                         adev->gfx.mec2_feature_version =
1167                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1168                 } else {
1169                         err = 0;
1170                         adev->gfx.mec2_fw = NULL;
1171                 }
1172         }
1173
1174         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1175         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1176         info->fw = adev->gfx.pfp_fw;
1177         header = (const struct common_firmware_header *)info->fw->data;
1178         adev->firmware.fw_size +=
1179                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1182         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1183         info->fw = adev->gfx.me_fw;
1184         header = (const struct common_firmware_header *)info->fw->data;
1185         adev->firmware.fw_size +=
1186                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1189         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1190         info->fw = adev->gfx.ce_fw;
1191         header = (const struct common_firmware_header *)info->fw->data;
1192         adev->firmware.fw_size +=
1193                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1196         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1197         info->fw = adev->gfx.rlc_fw;
1198         header = (const struct common_firmware_header *)info->fw->data;
1199         adev->firmware.fw_size +=
1200                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1201
1202         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1203         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1204         info->fw = adev->gfx.mec_fw;
1205         header = (const struct common_firmware_header *)info->fw->data;
1206         adev->firmware.fw_size +=
1207                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1208
1209         /* we need account JT in */
1210         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1211         adev->firmware.fw_size +=
1212                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1213
1214         if (amdgpu_sriov_vf(adev)) {
1215                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1216                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1217                 info->fw = adev->gfx.mec_fw;
1218                 adev->firmware.fw_size +=
1219                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1220         }
1221
1222         if (adev->gfx.mec2_fw) {
1223                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1224                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1225                 info->fw = adev->gfx.mec2_fw;
1226                 header = (const struct common_firmware_header *)info->fw->data;
1227                 adev->firmware.fw_size +=
1228                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1229         }
1230
1231 out:
1232         if (err) {
1233                 dev_err(adev->dev,
1234                         "gfx8: Failed to load firmware \"%s\"\n",
1235                         fw_name);
1236                 release_firmware(adev->gfx.pfp_fw);
1237                 adev->gfx.pfp_fw = NULL;
1238                 release_firmware(adev->gfx.me_fw);
1239                 adev->gfx.me_fw = NULL;
1240                 release_firmware(adev->gfx.ce_fw);
1241                 adev->gfx.ce_fw = NULL;
1242                 release_firmware(adev->gfx.rlc_fw);
1243                 adev->gfx.rlc_fw = NULL;
1244                 release_firmware(adev->gfx.mec_fw);
1245                 adev->gfx.mec_fw = NULL;
1246                 release_firmware(adev->gfx.mec2_fw);
1247                 adev->gfx.mec2_fw = NULL;
1248         }
1249         return err;
1250 }
1251
1252 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1253                                     volatile u32 *buffer)
1254 {
1255         u32 count = 0, i;
1256         const struct cs_section_def *sect = NULL;
1257         const struct cs_extent_def *ext = NULL;
1258
1259         if (adev->gfx.rlc.cs_data == NULL)
1260                 return;
1261         if (buffer == NULL)
1262                 return;
1263
1264         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1265         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1266
1267         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1268         buffer[count++] = cpu_to_le32(0x80000000);
1269         buffer[count++] = cpu_to_le32(0x80000000);
1270
1271         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1272                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1273                         if (sect->id == SECT_CONTEXT) {
1274                                 buffer[count++] =
1275                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1276                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1277                                                 PACKET3_SET_CONTEXT_REG_START);
1278                                 for (i = 0; i < ext->reg_count; i++)
1279                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1280                         } else {
1281                                 return;
1282                         }
1283                 }
1284         }
1285
1286         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1287         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1288                         PACKET3_SET_CONTEXT_REG_START);
1289         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1290         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1291
1292         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1293         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1294
1295         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1296         buffer[count++] = cpu_to_le32(0);
1297 }
1298
1299 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1300 {
1301         if (adev->asic_type == CHIP_CARRIZO)
1302                 return 5;
1303         else
1304                 return 4;
1305 }
1306
1307 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1308 {
1309         const struct cs_section_def *cs_data;
1310         int r;
1311
1312         adev->gfx.rlc.cs_data = vi_cs_data;
1313
1314         cs_data = adev->gfx.rlc.cs_data;
1315
1316         if (cs_data) {
1317                 /* init clear state block */
1318                 r = amdgpu_gfx_rlc_init_csb(adev);
1319                 if (r)
1320                         return r;
1321         }
1322
1323         if ((adev->asic_type == CHIP_CARRIZO) ||
1324             (adev->asic_type == CHIP_STONEY)) {
1325                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1326                 r = amdgpu_gfx_rlc_init_cpt(adev);
1327                 if (r)
1328                         return r;
1329         }
1330
1331         /* init spm vmid with 0xf */
1332         if (adev->gfx.rlc.funcs->update_spm_vmid)
1333                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1334
1335         return 0;
1336 }
1337
1338 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1339 {
1340         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1341 }
1342
1343 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1344 {
1345         int r;
1346         u32 *hpd;
1347         size_t mec_hpd_size;
1348
1349         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1350
1351         /* take ownership of the relevant compute queues */
1352         amdgpu_gfx_compute_queue_acquire(adev);
1353
1354         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1355         if (mec_hpd_size) {
1356                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1357                                               AMDGPU_GEM_DOMAIN_VRAM,
1358                                               &adev->gfx.mec.hpd_eop_obj,
1359                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1360                                               (void **)&hpd);
1361                 if (r) {
1362                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1363                         return r;
1364                 }
1365
1366                 memset(hpd, 0, mec_hpd_size);
1367
1368                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1369                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1370         }
1371
1372         return 0;
1373 }
1374
1375 static const u32 vgpr_init_compute_shader[] =
1376 {
1377         0x7e000209, 0x7e020208,
1378         0x7e040207, 0x7e060206,
1379         0x7e080205, 0x7e0a0204,
1380         0x7e0c0203, 0x7e0e0202,
1381         0x7e100201, 0x7e120200,
1382         0x7e140209, 0x7e160208,
1383         0x7e180207, 0x7e1a0206,
1384         0x7e1c0205, 0x7e1e0204,
1385         0x7e200203, 0x7e220202,
1386         0x7e240201, 0x7e260200,
1387         0x7e280209, 0x7e2a0208,
1388         0x7e2c0207, 0x7e2e0206,
1389         0x7e300205, 0x7e320204,
1390         0x7e340203, 0x7e360202,
1391         0x7e380201, 0x7e3a0200,
1392         0x7e3c0209, 0x7e3e0208,
1393         0x7e400207, 0x7e420206,
1394         0x7e440205, 0x7e460204,
1395         0x7e480203, 0x7e4a0202,
1396         0x7e4c0201, 0x7e4e0200,
1397         0x7e500209, 0x7e520208,
1398         0x7e540207, 0x7e560206,
1399         0x7e580205, 0x7e5a0204,
1400         0x7e5c0203, 0x7e5e0202,
1401         0x7e600201, 0x7e620200,
1402         0x7e640209, 0x7e660208,
1403         0x7e680207, 0x7e6a0206,
1404         0x7e6c0205, 0x7e6e0204,
1405         0x7e700203, 0x7e720202,
1406         0x7e740201, 0x7e760200,
1407         0x7e780209, 0x7e7a0208,
1408         0x7e7c0207, 0x7e7e0206,
1409         0xbf8a0000, 0xbf810000,
1410 };
1411
1412 static const u32 sgpr_init_compute_shader[] =
1413 {
1414         0xbe8a0100, 0xbe8c0102,
1415         0xbe8e0104, 0xbe900106,
1416         0xbe920108, 0xbe940100,
1417         0xbe960102, 0xbe980104,
1418         0xbe9a0106, 0xbe9c0108,
1419         0xbe9e0100, 0xbea00102,
1420         0xbea20104, 0xbea40106,
1421         0xbea60108, 0xbea80100,
1422         0xbeaa0102, 0xbeac0104,
1423         0xbeae0106, 0xbeb00108,
1424         0xbeb20100, 0xbeb40102,
1425         0xbeb60104, 0xbeb80106,
1426         0xbeba0108, 0xbebc0100,
1427         0xbebe0102, 0xbec00104,
1428         0xbec20106, 0xbec40108,
1429         0xbec60100, 0xbec80102,
1430         0xbee60004, 0xbee70005,
1431         0xbeea0006, 0xbeeb0007,
1432         0xbee80008, 0xbee90009,
1433         0xbefc0000, 0xbf8a0000,
1434         0xbf810000, 0x00000000,
1435 };
1436
1437 static const u32 vgpr_init_regs[] =
1438 {
1439         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1440         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1441         mmCOMPUTE_NUM_THREAD_X, 256*4,
1442         mmCOMPUTE_NUM_THREAD_Y, 1,
1443         mmCOMPUTE_NUM_THREAD_Z, 1,
1444         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1445         mmCOMPUTE_PGM_RSRC2, 20,
1446         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1456 };
1457
1458 static const u32 sgpr1_init_regs[] =
1459 {
1460         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1461         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1462         mmCOMPUTE_NUM_THREAD_X, 256*5,
1463         mmCOMPUTE_NUM_THREAD_Y, 1,
1464         mmCOMPUTE_NUM_THREAD_Z, 1,
1465         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1466         mmCOMPUTE_PGM_RSRC2, 20,
1467         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477 };
1478
1479 static const u32 sgpr2_init_regs[] =
1480 {
1481         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1482         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483         mmCOMPUTE_NUM_THREAD_X, 256*5,
1484         mmCOMPUTE_NUM_THREAD_Y, 1,
1485         mmCOMPUTE_NUM_THREAD_Z, 1,
1486         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1487         mmCOMPUTE_PGM_RSRC2, 20,
1488         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1489         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1490         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1491         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1492         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1493         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1494         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1495         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1496         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1497         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1498 };
1499
1500 static const u32 sec_ded_counter_registers[] =
1501 {
1502         mmCPC_EDC_ATC_CNT,
1503         mmCPC_EDC_SCRATCH_CNT,
1504         mmCPC_EDC_UCODE_CNT,
1505         mmCPF_EDC_ATC_CNT,
1506         mmCPF_EDC_ROQ_CNT,
1507         mmCPF_EDC_TAG_CNT,
1508         mmCPG_EDC_ATC_CNT,
1509         mmCPG_EDC_DMA_CNT,
1510         mmCPG_EDC_TAG_CNT,
1511         mmDC_EDC_CSINVOC_CNT,
1512         mmDC_EDC_RESTORE_CNT,
1513         mmDC_EDC_STATE_CNT,
1514         mmGDS_EDC_CNT,
1515         mmGDS_EDC_GRBM_CNT,
1516         mmGDS_EDC_OA_DED,
1517         mmSPI_EDC_CNT,
1518         mmSQC_ATC_EDC_GATCL1_CNT,
1519         mmSQC_EDC_CNT,
1520         mmSQ_EDC_DED_CNT,
1521         mmSQ_EDC_INFO,
1522         mmSQ_EDC_SEC_CNT,
1523         mmTCC_EDC_CNT,
1524         mmTCP_ATC_EDC_GATCL1_CNT,
1525         mmTCP_EDC_CNT,
1526         mmTD_EDC_CNT
1527 };
1528
1529 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1530 {
1531         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1532         struct amdgpu_ib ib;
1533         struct dma_fence *f = NULL;
1534         int r, i;
1535         u32 tmp;
1536         unsigned total_size, vgpr_offset, sgpr_offset;
1537         u64 gpu_addr;
1538
1539         /* only supported on CZ */
1540         if (adev->asic_type != CHIP_CARRIZO)
1541                 return 0;
1542
1543         /* bail if the compute ring is not ready */
1544         if (!ring->sched.ready)
1545                 return 0;
1546
1547         tmp = RREG32(mmGB_EDC_MODE);
1548         WREG32(mmGB_EDC_MODE, 0);
1549
1550         total_size =
1551                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1552         total_size +=
1553                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1554         total_size +=
1555                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1556         total_size = ALIGN(total_size, 256);
1557         vgpr_offset = total_size;
1558         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1559         sgpr_offset = total_size;
1560         total_size += sizeof(sgpr_init_compute_shader);
1561
1562         /* allocate an indirect buffer to put the commands in */
1563         memset(&ib, 0, sizeof(ib));
1564         r = amdgpu_ib_get(adev, NULL, total_size,
1565                                         AMDGPU_IB_POOL_DIRECT, &ib);
1566         if (r) {
1567                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1568                 return r;
1569         }
1570
1571         /* load the compute shaders */
1572         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1573                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1574
1575         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1576                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1577
1578         /* init the ib length to 0 */
1579         ib.length_dw = 0;
1580
1581         /* VGPR */
1582         /* write the register state for the compute dispatch */
1583         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1584                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1585                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1586                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1587         }
1588         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1589         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1590         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1591         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1592         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1593         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1594
1595         /* write dispatch packet */
1596         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1597         ib.ptr[ib.length_dw++] = 8; /* x */
1598         ib.ptr[ib.length_dw++] = 1; /* y */
1599         ib.ptr[ib.length_dw++] = 1; /* z */
1600         ib.ptr[ib.length_dw++] =
1601                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1602
1603         /* write CS partial flush packet */
1604         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1605         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1606
1607         /* SGPR1 */
1608         /* write the register state for the compute dispatch */
1609         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1610                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1611                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1612                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1613         }
1614         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1615         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1616         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1617         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1618         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1619         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1620
1621         /* write dispatch packet */
1622         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1623         ib.ptr[ib.length_dw++] = 8; /* x */
1624         ib.ptr[ib.length_dw++] = 1; /* y */
1625         ib.ptr[ib.length_dw++] = 1; /* z */
1626         ib.ptr[ib.length_dw++] =
1627                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1628
1629         /* write CS partial flush packet */
1630         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1631         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1632
1633         /* SGPR2 */
1634         /* write the register state for the compute dispatch */
1635         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1636                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1637                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1638                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1639         }
1640         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1641         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1642         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1643         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1644         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1645         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1646
1647         /* write dispatch packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1649         ib.ptr[ib.length_dw++] = 8; /* x */
1650         ib.ptr[ib.length_dw++] = 1; /* y */
1651         ib.ptr[ib.length_dw++] = 1; /* z */
1652         ib.ptr[ib.length_dw++] =
1653                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1654
1655         /* write CS partial flush packet */
1656         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1657         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1658
1659         /* shedule the ib on the ring */
1660         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1661         if (r) {
1662                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1663                 goto fail;
1664         }
1665
1666         /* wait for the GPU to finish processing the IB */
1667         r = dma_fence_wait(f, false);
1668         if (r) {
1669                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1670                 goto fail;
1671         }
1672
1673         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1674         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1675         WREG32(mmGB_EDC_MODE, tmp);
1676
1677         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1678         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1679         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1680
1681
1682         /* read back registers to clear the counters */
1683         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1684                 RREG32(sec_ded_counter_registers[i]);
1685
1686 fail:
1687         amdgpu_ib_free(adev, &ib, NULL);
1688         dma_fence_put(f);
1689
1690         return r;
1691 }
1692
1693 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1694 {
1695         u32 gb_addr_config;
1696         u32 mc_arb_ramcfg;
1697         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1698         u32 tmp;
1699         int ret;
1700
1701         switch (adev->asic_type) {
1702         case CHIP_TOPAZ:
1703                 adev->gfx.config.max_shader_engines = 1;
1704                 adev->gfx.config.max_tile_pipes = 2;
1705                 adev->gfx.config.max_cu_per_sh = 6;
1706                 adev->gfx.config.max_sh_per_se = 1;
1707                 adev->gfx.config.max_backends_per_se = 2;
1708                 adev->gfx.config.max_texture_channel_caches = 2;
1709                 adev->gfx.config.max_gprs = 256;
1710                 adev->gfx.config.max_gs_threads = 32;
1711                 adev->gfx.config.max_hw_contexts = 8;
1712
1713                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1714                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1715                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1716                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1717                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1718                 break;
1719         case CHIP_FIJI:
1720                 adev->gfx.config.max_shader_engines = 4;
1721                 adev->gfx.config.max_tile_pipes = 16;
1722                 adev->gfx.config.max_cu_per_sh = 16;
1723                 adev->gfx.config.max_sh_per_se = 1;
1724                 adev->gfx.config.max_backends_per_se = 4;
1725                 adev->gfx.config.max_texture_channel_caches = 16;
1726                 adev->gfx.config.max_gprs = 256;
1727                 adev->gfx.config.max_gs_threads = 32;
1728                 adev->gfx.config.max_hw_contexts = 8;
1729
1730                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1735                 break;
1736         case CHIP_POLARIS11:
1737         case CHIP_POLARIS12:
1738                 ret = amdgpu_atombios_get_gfx_info(adev);
1739                 if (ret)
1740                         return ret;
1741                 adev->gfx.config.max_gprs = 256;
1742                 adev->gfx.config.max_gs_threads = 32;
1743                 adev->gfx.config.max_hw_contexts = 8;
1744
1745                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1746                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1747                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1748                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1749                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1750                 break;
1751         case CHIP_POLARIS10:
1752         case CHIP_VEGAM:
1753                 ret = amdgpu_atombios_get_gfx_info(adev);
1754                 if (ret)
1755                         return ret;
1756                 adev->gfx.config.max_gprs = 256;
1757                 adev->gfx.config.max_gs_threads = 32;
1758                 adev->gfx.config.max_hw_contexts = 8;
1759
1760                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1761                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1762                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1763                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1764                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1765                 break;
1766         case CHIP_TONGA:
1767                 adev->gfx.config.max_shader_engines = 4;
1768                 adev->gfx.config.max_tile_pipes = 8;
1769                 adev->gfx.config.max_cu_per_sh = 8;
1770                 adev->gfx.config.max_sh_per_se = 1;
1771                 adev->gfx.config.max_backends_per_se = 2;
1772                 adev->gfx.config.max_texture_channel_caches = 8;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_CARRIZO:
1784                 adev->gfx.config.max_shader_engines = 1;
1785                 adev->gfx.config.max_tile_pipes = 2;
1786                 adev->gfx.config.max_sh_per_se = 1;
1787                 adev->gfx.config.max_backends_per_se = 2;
1788                 adev->gfx.config.max_cu_per_sh = 8;
1789                 adev->gfx.config.max_texture_channel_caches = 2;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 32;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         case CHIP_STONEY:
1801                 adev->gfx.config.max_shader_engines = 1;
1802                 adev->gfx.config.max_tile_pipes = 2;
1803                 adev->gfx.config.max_sh_per_se = 1;
1804                 adev->gfx.config.max_backends_per_se = 1;
1805                 adev->gfx.config.max_cu_per_sh = 3;
1806                 adev->gfx.config.max_texture_channel_caches = 2;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 16;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         default:
1818                 adev->gfx.config.max_shader_engines = 2;
1819                 adev->gfx.config.max_tile_pipes = 4;
1820                 adev->gfx.config.max_cu_per_sh = 2;
1821                 adev->gfx.config.max_sh_per_se = 1;
1822                 adev->gfx.config.max_backends_per_se = 2;
1823                 adev->gfx.config.max_texture_channel_caches = 4;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 32;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         }
1835
1836         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1837         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1838
1839         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1840                                 MC_ARB_RAMCFG, NOOFBANK);
1841         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1842                                 MC_ARB_RAMCFG, NOOFRANKS);
1843
1844         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1845         adev->gfx.config.mem_max_burst_length_bytes = 256;
1846         if (adev->flags & AMD_IS_APU) {
1847                 /* Get memory bank mapping mode. */
1848                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1849                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1850                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1851
1852                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1853                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1854                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1855
1856                 /* Validate settings in case only one DIMM installed. */
1857                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1858                         dimm00_addr_map = 0;
1859                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1860                         dimm01_addr_map = 0;
1861                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1862                         dimm10_addr_map = 0;
1863                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1864                         dimm11_addr_map = 0;
1865
1866                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1867                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1868                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1869                         adev->gfx.config.mem_row_size_in_kb = 2;
1870                 else
1871                         adev->gfx.config.mem_row_size_in_kb = 1;
1872         } else {
1873                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1874                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1875                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1876                         adev->gfx.config.mem_row_size_in_kb = 4;
1877         }
1878
1879         adev->gfx.config.shader_engine_tile_size = 32;
1880         adev->gfx.config.num_gpus = 1;
1881         adev->gfx.config.multi_gpu_tile_size = 64;
1882
1883         /* fix up row size */
1884         switch (adev->gfx.config.mem_row_size_in_kb) {
1885         case 1:
1886         default:
1887                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1888                 break;
1889         case 2:
1890                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1891                 break;
1892         case 4:
1893                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1894                 break;
1895         }
1896         adev->gfx.config.gb_addr_config = gb_addr_config;
1897
1898         return 0;
1899 }
1900
1901 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1902                                         int mec, int pipe, int queue)
1903 {
1904         int r;
1905         unsigned irq_type;
1906         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1907         unsigned int hw_prio;
1908
1909         ring = &adev->gfx.compute_ring[ring_id];
1910
1911         /* mec0 is me1 */
1912         ring->me = mec + 1;
1913         ring->pipe = pipe;
1914         ring->queue = queue;
1915
1916         ring->ring_obj = NULL;
1917         ring->use_doorbell = true;
1918         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1919         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1920                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1921         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1922
1923         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1924                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1925                 + ring->pipe;
1926
1927         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1928                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1929         /* type-2 packets are deprecated on MEC, use type-3 instead */
1930         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1931                              hw_prio, NULL);
1932         if (r)
1933                 return r;
1934
1935
1936         return 0;
1937 }
1938
1939 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1940
1941 static int gfx_v8_0_sw_init(void *handle)
1942 {
1943         int i, j, k, r, ring_id;
1944         struct amdgpu_ring *ring;
1945         struct amdgpu_kiq *kiq;
1946         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1947
1948         switch (adev->asic_type) {
1949         case CHIP_TONGA:
1950         case CHIP_CARRIZO:
1951         case CHIP_FIJI:
1952         case CHIP_POLARIS10:
1953         case CHIP_POLARIS11:
1954         case CHIP_POLARIS12:
1955         case CHIP_VEGAM:
1956                 adev->gfx.mec.num_mec = 2;
1957                 break;
1958         case CHIP_TOPAZ:
1959         case CHIP_STONEY:
1960         default:
1961                 adev->gfx.mec.num_mec = 1;
1962                 break;
1963         }
1964
1965         adev->gfx.mec.num_pipe_per_mec = 4;
1966         adev->gfx.mec.num_queue_per_pipe = 8;
1967
1968         /* EOP Event */
1969         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1970         if (r)
1971                 return r;
1972
1973         /* Privileged reg */
1974         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1975                               &adev->gfx.priv_reg_irq);
1976         if (r)
1977                 return r;
1978
1979         /* Privileged inst */
1980         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1981                               &adev->gfx.priv_inst_irq);
1982         if (r)
1983                 return r;
1984
1985         /* Add CP EDC/ECC irq  */
1986         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1987                               &adev->gfx.cp_ecc_error_irq);
1988         if (r)
1989                 return r;
1990
1991         /* SQ interrupts. */
1992         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1993                               &adev->gfx.sq_irq);
1994         if (r) {
1995                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1996                 return r;
1997         }
1998
1999         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2000
2001         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2002
2003         gfx_v8_0_scratch_init(adev);
2004
2005         r = gfx_v8_0_init_microcode(adev);
2006         if (r) {
2007                 DRM_ERROR("Failed to load gfx firmware!\n");
2008                 return r;
2009         }
2010
2011         r = adev->gfx.rlc.funcs->init(adev);
2012         if (r) {
2013                 DRM_ERROR("Failed to init rlc BOs!\n");
2014                 return r;
2015         }
2016
2017         r = gfx_v8_0_mec_init(adev);
2018         if (r) {
2019                 DRM_ERROR("Failed to init MEC BOs!\n");
2020                 return r;
2021         }
2022
2023         /* set up the gfx ring */
2024         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2025                 ring = &adev->gfx.gfx_ring[i];
2026                 ring->ring_obj = NULL;
2027                 sprintf(ring->name, "gfx");
2028                 /* no gfx doorbells on iceland */
2029                 if (adev->asic_type != CHIP_TOPAZ) {
2030                         ring->use_doorbell = true;
2031                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2032                 }
2033
2034                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2035                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2036                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2037                 if (r)
2038                         return r;
2039         }
2040
2041
2042         /* set up the compute queues - allocate horizontally across pipes */
2043         ring_id = 0;
2044         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2045                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2046                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2047                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2048                                         continue;
2049
2050                                 r = gfx_v8_0_compute_ring_init(adev,
2051                                                                 ring_id,
2052                                                                 i, k, j);
2053                                 if (r)
2054                                         return r;
2055
2056                                 ring_id++;
2057                         }
2058                 }
2059         }
2060
2061         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2062         if (r) {
2063                 DRM_ERROR("Failed to init KIQ BOs!\n");
2064                 return r;
2065         }
2066
2067         kiq = &adev->gfx.kiq;
2068         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2069         if (r)
2070                 return r;
2071
2072         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2073         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2074         if (r)
2075                 return r;
2076
2077         adev->gfx.ce_ram_size = 0x8000;
2078
2079         r = gfx_v8_0_gpu_early_init(adev);
2080         if (r)
2081                 return r;
2082
2083         return 0;
2084 }
2085
2086 static int gfx_v8_0_sw_fini(void *handle)
2087 {
2088         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2089         int i;
2090
2091         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2092                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2093         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2094                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2095
2096         amdgpu_gfx_mqd_sw_fini(adev);
2097         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2098         amdgpu_gfx_kiq_fini(adev);
2099
2100         gfx_v8_0_mec_fini(adev);
2101         amdgpu_gfx_rlc_fini(adev);
2102         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2103                                 &adev->gfx.rlc.clear_state_gpu_addr,
2104                                 (void **)&adev->gfx.rlc.cs_ptr);
2105         if ((adev->asic_type == CHIP_CARRIZO) ||
2106             (adev->asic_type == CHIP_STONEY)) {
2107                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2108                                 &adev->gfx.rlc.cp_table_gpu_addr,
2109                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2110         }
2111         gfx_v8_0_free_microcode(adev);
2112
2113         return 0;
2114 }
2115
2116 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2117 {
2118         uint32_t *modearray, *mod2array;
2119         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2120         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2121         u32 reg_offset;
2122
2123         modearray = adev->gfx.config.tile_mode_array;
2124         mod2array = adev->gfx.config.macrotile_mode_array;
2125
2126         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2127                 modearray[reg_offset] = 0;
2128
2129         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2130                 mod2array[reg_offset] = 0;
2131
2132         switch (adev->asic_type) {
2133         case CHIP_TOPAZ:
2134                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139                                 PIPE_CONFIG(ADDR_SURF_P2) |
2140                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2143                                 PIPE_CONFIG(ADDR_SURF_P2) |
2144                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2147                                 PIPE_CONFIG(ADDR_SURF_P2) |
2148                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2149                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2150                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                 PIPE_CONFIG(ADDR_SURF_P2) |
2152                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2155                                 PIPE_CONFIG(ADDR_SURF_P2) |
2156                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159                                 PIPE_CONFIG(ADDR_SURF_P2) |
2160                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163                                 PIPE_CONFIG(ADDR_SURF_P2));
2164                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2221                                  PIPE_CONFIG(ADDR_SURF_P2) |
2222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225                                  PIPE_CONFIG(ADDR_SURF_P2) |
2226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                  PIPE_CONFIG(ADDR_SURF_P2) |
2230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2232                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233                                  PIPE_CONFIG(ADDR_SURF_P2) |
2234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2236
2237                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                 NUM_BANKS(ADDR_SURF_8_BANK));
2241                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244                                 NUM_BANKS(ADDR_SURF_8_BANK));
2245                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                                 NUM_BANKS(ADDR_SURF_8_BANK));
2249                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                 NUM_BANKS(ADDR_SURF_8_BANK));
2253                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256                                 NUM_BANKS(ADDR_SURF_8_BANK));
2257                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260                                 NUM_BANKS(ADDR_SURF_8_BANK));
2261                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                 NUM_BANKS(ADDR_SURF_8_BANK));
2265                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                 NUM_BANKS(ADDR_SURF_16_BANK));
2269                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272                                 NUM_BANKS(ADDR_SURF_16_BANK));
2273                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276                                  NUM_BANKS(ADDR_SURF_16_BANK));
2277                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2278                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280                                  NUM_BANKS(ADDR_SURF_16_BANK));
2281                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2283                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284                                  NUM_BANKS(ADDR_SURF_16_BANK));
2285                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                  NUM_BANKS(ADDR_SURF_16_BANK));
2289                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2291                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292                                  NUM_BANKS(ADDR_SURF_8_BANK));
2293
2294                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2295                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2296                             reg_offset != 23)
2297                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2298
2299                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2300                         if (reg_offset != 7)
2301                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2302
2303                 break;
2304         case CHIP_FIJI:
2305         case CHIP_VEGAM:
2306                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2325                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2329                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2339                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2340                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2397                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2409                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428
2429                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2459                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                 NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                 NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                  NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                                  NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                                  NUM_BANKS(ADDR_SURF_8_BANK));
2477                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                                  NUM_BANKS(ADDR_SURF_8_BANK));
2481                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                                  NUM_BANKS(ADDR_SURF_4_BANK));
2485
2486                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2487                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2488
2489                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2490                         if (reg_offset != 7)
2491                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2492
2493                 break;
2494         case CHIP_TONGA:
2495                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2514                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2518                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2528                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2529                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2586                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2598                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617
2618                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                 NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                 NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2648                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649                                 NUM_BANKS(ADDR_SURF_16_BANK));
2650                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653                                 NUM_BANKS(ADDR_SURF_16_BANK));
2654                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657                                  NUM_BANKS(ADDR_SURF_16_BANK));
2658                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2661                                  NUM_BANKS(ADDR_SURF_16_BANK));
2662                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665                                  NUM_BANKS(ADDR_SURF_8_BANK));
2666                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669                                  NUM_BANKS(ADDR_SURF_4_BANK));
2670                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673                                  NUM_BANKS(ADDR_SURF_4_BANK));
2674
2675                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2676                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2677
2678                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2679                         if (reg_offset != 7)
2680                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2681
2682                 break;
2683         case CHIP_POLARIS11:
2684         case CHIP_POLARIS12:
2685                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807
2808                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                 NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821                                 NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826                                 NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                 NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861                                 NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866                                 NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871                                 NUM_BANKS(ADDR_SURF_8_BANK));
2872
2873                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876                                 NUM_BANKS(ADDR_SURF_4_BANK));
2877
2878                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880
2881                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882                         if (reg_offset != 7)
2883                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884
2885                 break;
2886         case CHIP_POLARIS10:
2887                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009
3010                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013                                 NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023                                 NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028                                 NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043                                 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048                                 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053                                 NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058                                 NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063                                 NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068                                 NUM_BANKS(ADDR_SURF_8_BANK));
3069
3070                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073                                 NUM_BANKS(ADDR_SURF_4_BANK));
3074
3075                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078                                 NUM_BANKS(ADDR_SURF_4_BANK));
3079
3080                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082
3083                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084                         if (reg_offset != 7)
3085                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086
3087                 break;
3088         case CHIP_STONEY:
3089                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2) |
3103                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2));
3119                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191
3192                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                 NUM_BANKS(ADDR_SURF_8_BANK));
3200                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207                                 NUM_BANKS(ADDR_SURF_8_BANK));
3208                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                 NUM_BANKS(ADDR_SURF_8_BANK));
3212                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                 NUM_BANKS(ADDR_SURF_16_BANK));
3224                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                 NUM_BANKS(ADDR_SURF_16_BANK));
3228                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                  NUM_BANKS(ADDR_SURF_16_BANK));
3232                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                  NUM_BANKS(ADDR_SURF_16_BANK));
3236                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                  NUM_BANKS(ADDR_SURF_16_BANK));
3240                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                  NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247                                  NUM_BANKS(ADDR_SURF_8_BANK));
3248
3249                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251                             reg_offset != 23)
3252                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253
3254                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255                         if (reg_offset != 7)
3256                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257
3258                 break;
3259         default:
3260                 dev_warn(adev->dev,
3261                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262                          adev->asic_type);
3263                 fallthrough;
3264
3265         case CHIP_CARRIZO:
3266                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271                                 PIPE_CONFIG(ADDR_SURF_P2) |
3272                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3273                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275                                 PIPE_CONFIG(ADDR_SURF_P2) |
3276                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2) |
3280                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3281                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3291                                 PIPE_CONFIG(ADDR_SURF_P2) |
3292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2));
3296                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297                                 PIPE_CONFIG(ADDR_SURF_P2) |
3298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3299                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301                                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3324                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3353                                  PIPE_CONFIG(ADDR_SURF_P2) |
3354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3357                                  PIPE_CONFIG(ADDR_SURF_P2) |
3358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3360                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361                                  PIPE_CONFIG(ADDR_SURF_P2) |
3362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3364                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3365                                  PIPE_CONFIG(ADDR_SURF_P2) |
3366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3368
3369                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3372                                 NUM_BANKS(ADDR_SURF_8_BANK));
3373                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376                                 NUM_BANKS(ADDR_SURF_8_BANK));
3377                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380                                 NUM_BANKS(ADDR_SURF_8_BANK));
3381                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3384                                 NUM_BANKS(ADDR_SURF_8_BANK));
3385                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3388                                 NUM_BANKS(ADDR_SURF_8_BANK));
3389                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                 NUM_BANKS(ADDR_SURF_8_BANK));
3393                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                 NUM_BANKS(ADDR_SURF_8_BANK));
3397                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400                                 NUM_BANKS(ADDR_SURF_16_BANK));
3401                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404                                 NUM_BANKS(ADDR_SURF_16_BANK));
3405                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3406                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3407                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3408                                  NUM_BANKS(ADDR_SURF_16_BANK));
3409                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3410                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3411                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                  NUM_BANKS(ADDR_SURF_16_BANK));
3413                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3414                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3415                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                  NUM_BANKS(ADDR_SURF_16_BANK));
3417                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3418                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3419                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                  NUM_BANKS(ADDR_SURF_16_BANK));
3421                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3423                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3424                                  NUM_BANKS(ADDR_SURF_8_BANK));
3425
3426                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3427                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3428                             reg_offset != 23)
3429                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3430
3431                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3432                         if (reg_offset != 7)
3433                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3434
3435                 break;
3436         }
3437 }
3438
3439 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3440                                   u32 se_num, u32 sh_num, u32 instance)
3441 {
3442         u32 data;
3443
3444         if (instance == 0xffffffff)
3445                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3446         else
3447                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3448
3449         if (se_num == 0xffffffff)
3450                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3451         else
3452                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3453
3454         if (sh_num == 0xffffffff)
3455                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3456         else
3457                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3458
3459         WREG32(mmGRBM_GFX_INDEX, data);
3460 }
3461
3462 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3463                                   u32 me, u32 pipe, u32 q, u32 vm)
3464 {
3465         vi_srbm_select(adev, me, pipe, q, vm);
3466 }
3467
3468 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3469 {
3470         u32 data, mask;
3471
3472         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3473                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3474
3475         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3476
3477         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3478                                          adev->gfx.config.max_sh_per_se);
3479
3480         return (~data) & mask;
3481 }
3482
3483 static void
3484 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3485 {
3486         switch (adev->asic_type) {
3487         case CHIP_FIJI:
3488         case CHIP_VEGAM:
3489                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3490                           RB_XSEL2(1) | PKR_MAP(2) |
3491                           PKR_XSEL(1) | PKR_YSEL(1) |
3492                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3493                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3494                            SE_PAIR_YSEL(2);
3495                 break;
3496         case CHIP_TONGA:
3497         case CHIP_POLARIS10:
3498                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3499                           SE_XSEL(1) | SE_YSEL(1);
3500                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3501                            SE_PAIR_YSEL(2);
3502                 break;
3503         case CHIP_TOPAZ:
3504         case CHIP_CARRIZO:
3505                 *rconf |= RB_MAP_PKR0(2);
3506                 *rconf1 |= 0x0;
3507                 break;
3508         case CHIP_POLARIS11:
3509         case CHIP_POLARIS12:
3510                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3511                           SE_XSEL(1) | SE_YSEL(1);
3512                 *rconf1 |= 0x0;
3513                 break;
3514         case CHIP_STONEY:
3515                 *rconf |= 0x0;
3516                 *rconf1 |= 0x0;
3517                 break;
3518         default:
3519                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3520                 break;
3521         }
3522 }
3523
3524 static void
3525 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3526                                         u32 raster_config, u32 raster_config_1,
3527                                         unsigned rb_mask, unsigned num_rb)
3528 {
3529         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3530         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3531         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3532         unsigned rb_per_se = num_rb / num_se;
3533         unsigned se_mask[4];
3534         unsigned se;
3535
3536         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3537         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3538         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3539         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3540
3541         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3542         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3543         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3544
3545         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3546                              (!se_mask[2] && !se_mask[3]))) {
3547                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3548
3549                 if (!se_mask[0] && !se_mask[1]) {
3550                         raster_config_1 |=
3551                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3552                 } else {
3553                         raster_config_1 |=
3554                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3555                 }
3556         }
3557
3558         for (se = 0; se < num_se; se++) {
3559                 unsigned raster_config_se = raster_config;
3560                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3561                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3562                 int idx = (se / 2) * 2;
3563
3564                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3565                         raster_config_se &= ~SE_MAP_MASK;
3566
3567                         if (!se_mask[idx]) {
3568                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3569                         } else {
3570                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3571                         }
3572                 }
3573
3574                 pkr0_mask &= rb_mask;
3575                 pkr1_mask &= rb_mask;
3576                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3577                         raster_config_se &= ~PKR_MAP_MASK;
3578
3579                         if (!pkr0_mask) {
3580                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3581                         } else {
3582                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3583                         }
3584                 }
3585
3586                 if (rb_per_se >= 2) {
3587                         unsigned rb0_mask = 1 << (se * rb_per_se);
3588                         unsigned rb1_mask = rb0_mask << 1;
3589
3590                         rb0_mask &= rb_mask;
3591                         rb1_mask &= rb_mask;
3592                         if (!rb0_mask || !rb1_mask) {
3593                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3594
3595                                 if (!rb0_mask) {
3596                                         raster_config_se |=
3597                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3598                                 } else {
3599                                         raster_config_se |=
3600                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3601                                 }
3602                         }
3603
3604                         if (rb_per_se > 2) {
3605                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3606                                 rb1_mask = rb0_mask << 1;
3607                                 rb0_mask &= rb_mask;
3608                                 rb1_mask &= rb_mask;
3609                                 if (!rb0_mask || !rb1_mask) {
3610                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3611
3612                                         if (!rb0_mask) {
3613                                                 raster_config_se |=
3614                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3615                                         } else {
3616                                                 raster_config_se |=
3617                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3618                                         }
3619                                 }
3620                         }
3621                 }
3622
3623                 /* GRBM_GFX_INDEX has a different offset on VI */
3624                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3625                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3626                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3627         }
3628
3629         /* GRBM_GFX_INDEX has a different offset on VI */
3630         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3631 }
3632
3633 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3634 {
3635         int i, j;
3636         u32 data;
3637         u32 raster_config = 0, raster_config_1 = 0;
3638         u32 active_rbs = 0;
3639         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3640                                         adev->gfx.config.max_sh_per_se;
3641         unsigned num_rb_pipes;
3642
3643         mutex_lock(&adev->grbm_idx_mutex);
3644         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3645                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3646                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3647                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3648                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3649                                                rb_bitmap_width_per_sh);
3650                 }
3651         }
3652         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3653
3654         adev->gfx.config.backend_enable_mask = active_rbs;
3655         adev->gfx.config.num_rbs = hweight32(active_rbs);
3656
3657         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3658                              adev->gfx.config.max_shader_engines, 16);
3659
3660         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3661
3662         if (!adev->gfx.config.backend_enable_mask ||
3663                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3664                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3665                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3666         } else {
3667                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3668                                                         adev->gfx.config.backend_enable_mask,
3669                                                         num_rb_pipes);
3670         }
3671
3672         /* cache the values for userspace */
3673         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3674                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3675                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3676                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3677                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3678                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3679                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3680                         adev->gfx.config.rb_config[i][j].raster_config =
3681                                 RREG32(mmPA_SC_RASTER_CONFIG);
3682                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3683                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3684                 }
3685         }
3686         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3687         mutex_unlock(&adev->grbm_idx_mutex);
3688 }
3689
3690 #define DEFAULT_SH_MEM_BASES    (0x6000)
3691 /**
3692  * gfx_v8_0_init_compute_vmid - gart enable
3693  *
3694  * @adev: amdgpu_device pointer
3695  *
3696  * Initialize compute vmid sh_mem registers
3697  *
3698  */
3699 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3700 {
3701         int i;
3702         uint32_t sh_mem_config;
3703         uint32_t sh_mem_bases;
3704
3705         /*
3706          * Configure apertures:
3707          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3708          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3709          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3710          */
3711         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3712
3713         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3714                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3715                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3716                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3717                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3718                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3719
3720         mutex_lock(&adev->srbm_mutex);
3721         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3722                 vi_srbm_select(adev, 0, 0, 0, i);
3723                 /* CP and shaders */
3724                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3725                 WREG32(mmSH_MEM_APE1_BASE, 1);
3726                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3727                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3728         }
3729         vi_srbm_select(adev, 0, 0, 0, 0);
3730         mutex_unlock(&adev->srbm_mutex);
3731
3732         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3733            acccess. These should be enabled by FW for target VMIDs. */
3734         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3735                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3736                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3737                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3738                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3739         }
3740 }
3741
3742 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3743 {
3744         int vmid;
3745
3746         /*
3747          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3748          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3749          * the driver can enable them for graphics. VMID0 should maintain
3750          * access so that HWS firmware can save/restore entries.
3751          */
3752         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3753                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3754                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3755                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3756                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3757         }
3758 }
3759
3760 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3761 {
3762         switch (adev->asic_type) {
3763         default:
3764                 adev->gfx.config.double_offchip_lds_buf = 1;
3765                 break;
3766         case CHIP_CARRIZO:
3767         case CHIP_STONEY:
3768                 adev->gfx.config.double_offchip_lds_buf = 0;
3769                 break;
3770         }
3771 }
3772
3773 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3774 {
3775         u32 tmp, sh_static_mem_cfg;
3776         int i;
3777
3778         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3779         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3780         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3781         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3782
3783         gfx_v8_0_tiling_mode_table_init(adev);
3784         gfx_v8_0_setup_rb(adev);
3785         gfx_v8_0_get_cu_info(adev);
3786         gfx_v8_0_config_init(adev);
3787
3788         /* XXX SH_MEM regs */
3789         /* where to put LDS, scratch, GPUVM in FSA64 space */
3790         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3791                                    SWIZZLE_ENABLE, 1);
3792         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3793                                    ELEMENT_SIZE, 1);
3794         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3795                                    INDEX_STRIDE, 3);
3796         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3797
3798         mutex_lock(&adev->srbm_mutex);
3799         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3800                 vi_srbm_select(adev, 0, 0, 0, i);
3801                 /* CP and shaders */
3802                 if (i == 0) {
3803                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3804                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3805                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3806                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3807                         WREG32(mmSH_MEM_CONFIG, tmp);
3808                         WREG32(mmSH_MEM_BASES, 0);
3809                 } else {
3810                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3811                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3812                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3813                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3814                         WREG32(mmSH_MEM_CONFIG, tmp);
3815                         tmp = adev->gmc.shared_aperture_start >> 48;
3816                         WREG32(mmSH_MEM_BASES, tmp);
3817                 }
3818
3819                 WREG32(mmSH_MEM_APE1_BASE, 1);
3820                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3821         }
3822         vi_srbm_select(adev, 0, 0, 0, 0);
3823         mutex_unlock(&adev->srbm_mutex);
3824
3825         gfx_v8_0_init_compute_vmid(adev);
3826         gfx_v8_0_init_gds_vmid(adev);
3827
3828         mutex_lock(&adev->grbm_idx_mutex);
3829         /*
3830          * making sure that the following register writes will be broadcasted
3831          * to all the shaders
3832          */
3833         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3834
3835         WREG32(mmPA_SC_FIFO_SIZE,
3836                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3837                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3838                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3839                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3840                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3841                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3842                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3843                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3844
3845         tmp = RREG32(mmSPI_ARB_PRIORITY);
3846         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3847         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3848         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3849         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3850         WREG32(mmSPI_ARB_PRIORITY, tmp);
3851
3852         mutex_unlock(&adev->grbm_idx_mutex);
3853
3854 }
3855
3856 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3857 {
3858         u32 i, j, k;
3859         u32 mask;
3860
3861         mutex_lock(&adev->grbm_idx_mutex);
3862         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3863                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3864                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3865                         for (k = 0; k < adev->usec_timeout; k++) {
3866                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3867                                         break;
3868                                 udelay(1);
3869                         }
3870                         if (k == adev->usec_timeout) {
3871                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3872                                                       0xffffffff, 0xffffffff);
3873                                 mutex_unlock(&adev->grbm_idx_mutex);
3874                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3875                                          i, j);
3876                                 return;
3877                         }
3878                 }
3879         }
3880         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3881         mutex_unlock(&adev->grbm_idx_mutex);
3882
3883         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3884                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3885                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3886                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3887         for (k = 0; k < adev->usec_timeout; k++) {
3888                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3889                         break;
3890                 udelay(1);
3891         }
3892 }
3893
3894 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3895                                                bool enable)
3896 {
3897         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3898
3899         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3900         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3901         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3902         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3903
3904         WREG32(mmCP_INT_CNTL_RING0, tmp);
3905 }
3906
3907 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3908 {
3909         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3910         /* csib */
3911         WREG32(mmRLC_CSIB_ADDR_HI,
3912                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3913         WREG32(mmRLC_CSIB_ADDR_LO,
3914                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3915         WREG32(mmRLC_CSIB_LENGTH,
3916                         adev->gfx.rlc.clear_state_size);
3917 }
3918
3919 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3920                                 int ind_offset,
3921                                 int list_size,
3922                                 int *unique_indices,
3923                                 int *indices_count,
3924                                 int max_indices,
3925                                 int *ind_start_offsets,
3926                                 int *offset_count,
3927                                 int max_offset)
3928 {
3929         int indices;
3930         bool new_entry = true;
3931
3932         for (; ind_offset < list_size; ind_offset++) {
3933
3934                 if (new_entry) {
3935                         new_entry = false;
3936                         ind_start_offsets[*offset_count] = ind_offset;
3937                         *offset_count = *offset_count + 1;
3938                         BUG_ON(*offset_count >= max_offset);
3939                 }
3940
3941                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3942                         new_entry = true;
3943                         continue;
3944                 }
3945
3946                 ind_offset += 2;
3947
3948                 /* look for the matching indice */
3949                 for (indices = 0;
3950                         indices < *indices_count;
3951                         indices++) {
3952                         if (unique_indices[indices] ==
3953                                 register_list_format[ind_offset])
3954                                 break;
3955                 }
3956
3957                 if (indices >= *indices_count) {
3958                         unique_indices[*indices_count] =
3959                                 register_list_format[ind_offset];
3960                         indices = *indices_count;
3961                         *indices_count = *indices_count + 1;
3962                         BUG_ON(*indices_count >= max_indices);
3963                 }
3964
3965                 register_list_format[ind_offset] = indices;
3966         }
3967 }
3968
3969 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3970 {
3971         int i, temp, data;
3972         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3973         int indices_count = 0;
3974         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3975         int offset_count = 0;
3976
3977         int list_size;
3978         unsigned int *register_list_format =
3979                 kmemdup(adev->gfx.rlc.register_list_format,
3980                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3981         if (!register_list_format)
3982                 return -ENOMEM;
3983
3984         gfx_v8_0_parse_ind_reg_list(register_list_format,
3985                                 RLC_FormatDirectRegListLength,
3986                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3987                                 unique_indices,
3988                                 &indices_count,
3989                                 ARRAY_SIZE(unique_indices),
3990                                 indirect_start_offsets,
3991                                 &offset_count,
3992                                 ARRAY_SIZE(indirect_start_offsets));
3993
3994         /* save and restore list */
3995         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3996
3997         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3998         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3999                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4000
4001         /* indirect list */
4002         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4003         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4004                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4005
4006         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4007         list_size = list_size >> 1;
4008         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4009         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4010
4011         /* starting offsets starts */
4012         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4013                 adev->gfx.rlc.starting_offsets_start);
4014         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4015                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4016                                 indirect_start_offsets[i]);
4017
4018         /* unique indices */
4019         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4020         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4021         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4022                 if (unique_indices[i] != 0) {
4023                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4024                         WREG32(data + i, unique_indices[i] >> 20);
4025                 }
4026         }
4027         kfree(register_list_format);
4028
4029         return 0;
4030 }
4031
4032 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4033 {
4034         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4035 }
4036
4037 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4038 {
4039         uint32_t data;
4040
4041         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4042
4043         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4044         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4045         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4046         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4047         WREG32(mmRLC_PG_DELAY, data);
4048
4049         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4050         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4051
4052 }
4053
4054 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4055                                                 bool enable)
4056 {
4057         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4058 }
4059
4060 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4061                                                   bool enable)
4062 {
4063         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4064 }
4065
4066 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4067 {
4068         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4069 }
4070
4071 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4072 {
4073         if ((adev->asic_type == CHIP_CARRIZO) ||
4074             (adev->asic_type == CHIP_STONEY)) {
4075                 gfx_v8_0_init_csb(adev);
4076                 gfx_v8_0_init_save_restore_list(adev);
4077                 gfx_v8_0_enable_save_restore_machine(adev);
4078                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4079                 gfx_v8_0_init_power_gating(adev);
4080                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4081         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4082                    (adev->asic_type == CHIP_POLARIS12) ||
4083                    (adev->asic_type == CHIP_VEGAM)) {
4084                 gfx_v8_0_init_csb(adev);
4085                 gfx_v8_0_init_save_restore_list(adev);
4086                 gfx_v8_0_enable_save_restore_machine(adev);
4087                 gfx_v8_0_init_power_gating(adev);
4088         }
4089
4090 }
4091
4092 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4093 {
4094         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4095
4096         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4097         gfx_v8_0_wait_for_rlc_serdes(adev);
4098 }
4099
4100 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4101 {
4102         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103         udelay(50);
4104
4105         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4106         udelay(50);
4107 }
4108
4109 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4110 {
4111         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4112
4113         /* carrizo do enable cp interrupt after cp inited */
4114         if (!(adev->flags & AMD_IS_APU))
4115                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4116
4117         udelay(50);
4118 }
4119
4120 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4121 {
4122         if (amdgpu_sriov_vf(adev)) {
4123                 gfx_v8_0_init_csb(adev);
4124                 return 0;
4125         }
4126
4127         adev->gfx.rlc.funcs->stop(adev);
4128         adev->gfx.rlc.funcs->reset(adev);
4129         gfx_v8_0_init_pg(adev);
4130         adev->gfx.rlc.funcs->start(adev);
4131
4132         return 0;
4133 }
4134
4135 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4136 {
4137         u32 tmp = RREG32(mmCP_ME_CNTL);
4138
4139         if (enable) {
4140                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4141                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4142                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4143         } else {
4144                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4145                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4146                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4147         }
4148         WREG32(mmCP_ME_CNTL, tmp);
4149         udelay(50);
4150 }
4151
4152 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4153 {
4154         u32 count = 0;
4155         const struct cs_section_def *sect = NULL;
4156         const struct cs_extent_def *ext = NULL;
4157
4158         /* begin clear state */
4159         count += 2;
4160         /* context control state */
4161         count += 3;
4162
4163         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4164                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4165                         if (sect->id == SECT_CONTEXT)
4166                                 count += 2 + ext->reg_count;
4167                         else
4168                                 return 0;
4169                 }
4170         }
4171         /* pa_sc_raster_config/pa_sc_raster_config1 */
4172         count += 4;
4173         /* end clear state */
4174         count += 2;
4175         /* clear state */
4176         count += 2;
4177
4178         return count;
4179 }
4180
4181 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4182 {
4183         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4184         const struct cs_section_def *sect = NULL;
4185         const struct cs_extent_def *ext = NULL;
4186         int r, i;
4187
4188         /* init the CP */
4189         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4190         WREG32(mmCP_ENDIAN_SWAP, 0);
4191         WREG32(mmCP_DEVICE_ID, 1);
4192
4193         gfx_v8_0_cp_gfx_enable(adev, true);
4194
4195         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4196         if (r) {
4197                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4198                 return r;
4199         }
4200
4201         /* clear state buffer */
4202         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4203         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4204
4205         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4206         amdgpu_ring_write(ring, 0x80000000);
4207         amdgpu_ring_write(ring, 0x80000000);
4208
4209         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4210                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4211                         if (sect->id == SECT_CONTEXT) {
4212                                 amdgpu_ring_write(ring,
4213                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4214                                                ext->reg_count));
4215                                 amdgpu_ring_write(ring,
4216                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4217                                 for (i = 0; i < ext->reg_count; i++)
4218                                         amdgpu_ring_write(ring, ext->extent[i]);
4219                         }
4220                 }
4221         }
4222
4223         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4224         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4225         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4226         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4227
4228         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4229         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4230
4231         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4232         amdgpu_ring_write(ring, 0);
4233
4234         /* init the CE partitions */
4235         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4236         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4237         amdgpu_ring_write(ring, 0x8000);
4238         amdgpu_ring_write(ring, 0x8000);
4239
4240         amdgpu_ring_commit(ring);
4241
4242         return 0;
4243 }
4244 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4245 {
4246         u32 tmp;
4247         /* no gfx doorbells on iceland */
4248         if (adev->asic_type == CHIP_TOPAZ)
4249                 return;
4250
4251         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4252
4253         if (ring->use_doorbell) {
4254                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4255                                 DOORBELL_OFFSET, ring->doorbell_index);
4256                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4257                                                 DOORBELL_HIT, 0);
4258                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4259                                             DOORBELL_EN, 1);
4260         } else {
4261                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4262         }
4263
4264         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4265
4266         if (adev->flags & AMD_IS_APU)
4267                 return;
4268
4269         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4270                                         DOORBELL_RANGE_LOWER,
4271                                         adev->doorbell_index.gfx_ring0);
4272         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4273
4274         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4275                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4276 }
4277
4278 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4279 {
4280         struct amdgpu_ring *ring;
4281         u32 tmp;
4282         u32 rb_bufsz;
4283         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4284
4285         /* Set the write pointer delay */
4286         WREG32(mmCP_RB_WPTR_DELAY, 0);
4287
4288         /* set the RB to use vmid 0 */
4289         WREG32(mmCP_RB_VMID, 0);
4290
4291         /* Set ring buffer size */
4292         ring = &adev->gfx.gfx_ring[0];
4293         rb_bufsz = order_base_2(ring->ring_size / 8);
4294         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4295         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4296         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4297         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4298 #ifdef __BIG_ENDIAN
4299         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4300 #endif
4301         WREG32(mmCP_RB0_CNTL, tmp);
4302
4303         /* Initialize the ring buffer's read and write pointers */
4304         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4305         ring->wptr = 0;
4306         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4307
4308         /* set the wb address wether it's enabled or not */
4309         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4310         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4311         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4312
4313         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4314         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4315         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4316         mdelay(1);
4317         WREG32(mmCP_RB0_CNTL, tmp);
4318
4319         rb_addr = ring->gpu_addr >> 8;
4320         WREG32(mmCP_RB0_BASE, rb_addr);
4321         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4322
4323         gfx_v8_0_set_cpg_door_bell(adev, ring);
4324         /* start the ring */
4325         amdgpu_ring_clear_ring(ring);
4326         gfx_v8_0_cp_gfx_start(adev);
4327         ring->sched.ready = true;
4328
4329         return 0;
4330 }
4331
4332 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4333 {
4334         if (enable) {
4335                 WREG32(mmCP_MEC_CNTL, 0);
4336         } else {
4337                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4338                 adev->gfx.kiq.ring.sched.ready = false;
4339         }
4340         udelay(50);
4341 }
4342
4343 /* KIQ functions */
4344 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4345 {
4346         uint32_t tmp;
4347         struct amdgpu_device *adev = ring->adev;
4348
4349         /* tell RLC which is KIQ queue */
4350         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4351         tmp &= 0xffffff00;
4352         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4353         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4354         tmp |= 0x80;
4355         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4356 }
4357
4358 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4359 {
4360         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4361         uint64_t queue_mask = 0;
4362         int r, i;
4363
4364         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4365                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4366                         continue;
4367
4368                 /* This situation may be hit in the future if a new HW
4369                  * generation exposes more than 64 queues. If so, the
4370                  * definition of queue_mask needs updating */
4371                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4372                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4373                         break;
4374                 }
4375
4376                 queue_mask |= (1ull << i);
4377         }
4378
4379         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4380         if (r) {
4381                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4382                 return r;
4383         }
4384         /* set resources */
4385         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4386         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4387         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4388         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4389         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4390         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4391         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4392         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4393         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4394                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4395                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4396                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4397
4398                 /* map queues */
4399                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4400                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4401                 amdgpu_ring_write(kiq_ring,
4402                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4403                 amdgpu_ring_write(kiq_ring,
4404                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4405                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4406                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4407                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4408                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4409                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4410                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4411                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4412         }
4413
4414         amdgpu_ring_commit(kiq_ring);
4415
4416         return 0;
4417 }
4418
4419 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4420 {
4421         int i, r = 0;
4422
4423         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4424                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4425                 for (i = 0; i < adev->usec_timeout; i++) {
4426                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4427                                 break;
4428                         udelay(1);
4429                 }
4430                 if (i == adev->usec_timeout)
4431                         r = -ETIMEDOUT;
4432         }
4433         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4434         WREG32(mmCP_HQD_PQ_RPTR, 0);
4435         WREG32(mmCP_HQD_PQ_WPTR, 0);
4436
4437         return r;
4438 }
4439
4440 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4441 {
4442         struct amdgpu_device *adev = ring->adev;
4443
4444         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4445                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4446                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4447                         mqd->cp_hqd_queue_priority =
4448                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4449                 }
4450         }
4451 }
4452
4453 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4454 {
4455         struct amdgpu_device *adev = ring->adev;
4456         struct vi_mqd *mqd = ring->mqd_ptr;
4457         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4458         uint32_t tmp;
4459
4460         mqd->header = 0xC0310800;
4461         mqd->compute_pipelinestat_enable = 0x00000001;
4462         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4463         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4464         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4465         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4466         mqd->compute_misc_reserved = 0x00000003;
4467         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4468                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4469         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4470                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4471         eop_base_addr = ring->eop_gpu_addr >> 8;
4472         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4473         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4474
4475         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4476         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4477         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4478                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4479
4480         mqd->cp_hqd_eop_control = tmp;
4481
4482         /* enable doorbell? */
4483         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4484                             CP_HQD_PQ_DOORBELL_CONTROL,
4485                             DOORBELL_EN,
4486                             ring->use_doorbell ? 1 : 0);
4487
4488         mqd->cp_hqd_pq_doorbell_control = tmp;
4489
4490         /* set the pointer to the MQD */
4491         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4492         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4493
4494         /* set MQD vmid to 0 */
4495         tmp = RREG32(mmCP_MQD_CONTROL);
4496         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4497         mqd->cp_mqd_control = tmp;
4498
4499         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4500         hqd_gpu_addr = ring->gpu_addr >> 8;
4501         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4502         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4503
4504         /* set up the HQD, this is similar to CP_RB0_CNTL */
4505         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4506         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4507                             (order_base_2(ring->ring_size / 4) - 1));
4508         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4509                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4510 #ifdef __BIG_ENDIAN
4511         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4512 #endif
4513         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4514         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4515         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4516         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4517         mqd->cp_hqd_pq_control = tmp;
4518
4519         /* set the wb address whether it's enabled or not */
4520         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4521         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4522         mqd->cp_hqd_pq_rptr_report_addr_hi =
4523                 upper_32_bits(wb_gpu_addr) & 0xffff;
4524
4525         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4526         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4527         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4528         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4529
4530         tmp = 0;
4531         /* enable the doorbell if requested */
4532         if (ring->use_doorbell) {
4533                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4534                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4535                                 DOORBELL_OFFSET, ring->doorbell_index);
4536
4537                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4538                                          DOORBELL_EN, 1);
4539                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4540                                          DOORBELL_SOURCE, 0);
4541                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4542                                          DOORBELL_HIT, 0);
4543         }
4544
4545         mqd->cp_hqd_pq_doorbell_control = tmp;
4546
4547         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4548         ring->wptr = 0;
4549         mqd->cp_hqd_pq_wptr = ring->wptr;
4550         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4551
4552         /* set the vmid for the queue */
4553         mqd->cp_hqd_vmid = 0;
4554
4555         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4556         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4557         mqd->cp_hqd_persistent_state = tmp;
4558
4559         /* set MTYPE */
4560         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4561         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4562         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4563         mqd->cp_hqd_ib_control = tmp;
4564
4565         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4566         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4567         mqd->cp_hqd_iq_timer = tmp;
4568
4569         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4570         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4571         mqd->cp_hqd_ctx_save_control = tmp;
4572
4573         /* defaults */
4574         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4575         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4576         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4577         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4578         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4579         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4580         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4581         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4582         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4583         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4584         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4585         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4586
4587         /* set static priority for a queue/ring */
4588         gfx_v8_0_mqd_set_priority(ring, mqd);
4589         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4590
4591         /* map_queues packet doesn't need activate the queue,
4592          * so only kiq need set this field.
4593          */
4594         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4595                 mqd->cp_hqd_active = 1;
4596
4597         return 0;
4598 }
4599
4600 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4601                         struct vi_mqd *mqd)
4602 {
4603         uint32_t mqd_reg;
4604         uint32_t *mqd_data;
4605
4606         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4607         mqd_data = &mqd->cp_mqd_base_addr_lo;
4608
4609         /* disable wptr polling */
4610         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4611
4612         /* program all HQD registers */
4613         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4614                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4615
4616         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4617          * This is safe since EOP RPTR==WPTR for any inactive HQD
4618          * on ASICs that do not support context-save.
4619          * EOP writes/reads can start anywhere in the ring.
4620          */
4621         if (adev->asic_type != CHIP_TONGA) {
4622                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4623                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4624                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4625         }
4626
4627         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4628                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4629
4630         /* activate the HQD */
4631         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4632                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4633
4634         return 0;
4635 }
4636
4637 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4638 {
4639         struct amdgpu_device *adev = ring->adev;
4640         struct vi_mqd *mqd = ring->mqd_ptr;
4641         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4642
4643         gfx_v8_0_kiq_setting(ring);
4644
4645         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4646                 /* reset MQD to a clean status */
4647                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4648                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4649
4650                 /* reset ring buffer */
4651                 ring->wptr = 0;
4652                 amdgpu_ring_clear_ring(ring);
4653                 mutex_lock(&adev->srbm_mutex);
4654                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4655                 gfx_v8_0_mqd_commit(adev, mqd);
4656                 vi_srbm_select(adev, 0, 0, 0, 0);
4657                 mutex_unlock(&adev->srbm_mutex);
4658         } else {
4659                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4660                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4661                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4662                 mutex_lock(&adev->srbm_mutex);
4663                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4664                 gfx_v8_0_mqd_init(ring);
4665                 gfx_v8_0_mqd_commit(adev, mqd);
4666                 vi_srbm_select(adev, 0, 0, 0, 0);
4667                 mutex_unlock(&adev->srbm_mutex);
4668
4669                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4670                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4671         }
4672
4673         return 0;
4674 }
4675
4676 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4677 {
4678         struct amdgpu_device *adev = ring->adev;
4679         struct vi_mqd *mqd = ring->mqd_ptr;
4680         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4681
4682         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4683                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4684                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4685                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4686                 mutex_lock(&adev->srbm_mutex);
4687                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4688                 gfx_v8_0_mqd_init(ring);
4689                 vi_srbm_select(adev, 0, 0, 0, 0);
4690                 mutex_unlock(&adev->srbm_mutex);
4691
4692                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4693                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4694         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4695                 /* reset MQD to a clean status */
4696                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4697                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4698                 /* reset ring buffer */
4699                 ring->wptr = 0;
4700                 amdgpu_ring_clear_ring(ring);
4701         } else {
4702                 amdgpu_ring_clear_ring(ring);
4703         }
4704         return 0;
4705 }
4706
4707 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4708 {
4709         if (adev->asic_type > CHIP_TONGA) {
4710                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4711                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4712         }
4713         /* enable doorbells */
4714         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4715 }
4716
4717 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4718 {
4719         struct amdgpu_ring *ring;
4720         int r;
4721
4722         ring = &adev->gfx.kiq.ring;
4723
4724         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4725         if (unlikely(r != 0))
4726                 return r;
4727
4728         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4729         if (unlikely(r != 0))
4730                 return r;
4731
4732         gfx_v8_0_kiq_init_queue(ring);
4733         amdgpu_bo_kunmap(ring->mqd_obj);
4734         ring->mqd_ptr = NULL;
4735         amdgpu_bo_unreserve(ring->mqd_obj);
4736         ring->sched.ready = true;
4737         return 0;
4738 }
4739
4740 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4741 {
4742         struct amdgpu_ring *ring = NULL;
4743         int r = 0, i;
4744
4745         gfx_v8_0_cp_compute_enable(adev, true);
4746
4747         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748                 ring = &adev->gfx.compute_ring[i];
4749
4750                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4751                 if (unlikely(r != 0))
4752                         goto done;
4753                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4754                 if (!r) {
4755                         r = gfx_v8_0_kcq_init_queue(ring);
4756                         amdgpu_bo_kunmap(ring->mqd_obj);
4757                         ring->mqd_ptr = NULL;
4758                 }
4759                 amdgpu_bo_unreserve(ring->mqd_obj);
4760                 if (r)
4761                         goto done;
4762         }
4763
4764         gfx_v8_0_set_mec_doorbell_range(adev);
4765
4766         r = gfx_v8_0_kiq_kcq_enable(adev);
4767         if (r)
4768                 goto done;
4769
4770 done:
4771         return r;
4772 }
4773
4774 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4775 {
4776         int r, i;
4777         struct amdgpu_ring *ring;
4778
4779         /* collect all the ring_tests here, gfx, kiq, compute */
4780         ring = &adev->gfx.gfx_ring[0];
4781         r = amdgpu_ring_test_helper(ring);
4782         if (r)
4783                 return r;
4784
4785         ring = &adev->gfx.kiq.ring;
4786         r = amdgpu_ring_test_helper(ring);
4787         if (r)
4788                 return r;
4789
4790         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4791                 ring = &adev->gfx.compute_ring[i];
4792                 amdgpu_ring_test_helper(ring);
4793         }
4794
4795         return 0;
4796 }
4797
4798 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4799 {
4800         int r;
4801
4802         if (!(adev->flags & AMD_IS_APU))
4803                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4804
4805         r = gfx_v8_0_kiq_resume(adev);
4806         if (r)
4807                 return r;
4808
4809         r = gfx_v8_0_cp_gfx_resume(adev);
4810         if (r)
4811                 return r;
4812
4813         r = gfx_v8_0_kcq_resume(adev);
4814         if (r)
4815                 return r;
4816
4817         r = gfx_v8_0_cp_test_all_rings(adev);
4818         if (r)
4819                 return r;
4820
4821         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4822
4823         return 0;
4824 }
4825
4826 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4827 {
4828         gfx_v8_0_cp_gfx_enable(adev, enable);
4829         gfx_v8_0_cp_compute_enable(adev, enable);
4830 }
4831
4832 static int gfx_v8_0_hw_init(void *handle)
4833 {
4834         int r;
4835         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836
4837         gfx_v8_0_init_golden_registers(adev);
4838         gfx_v8_0_constants_init(adev);
4839
4840         r = adev->gfx.rlc.funcs->resume(adev);
4841         if (r)
4842                 return r;
4843
4844         r = gfx_v8_0_cp_resume(adev);
4845
4846         return r;
4847 }
4848
4849 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4850 {
4851         int r, i;
4852         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4853
4854         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4855         if (r)
4856                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4857
4858         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4859                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4860
4861                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4862                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4863                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4864                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4865                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4866                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4867                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4868                 amdgpu_ring_write(kiq_ring, 0);
4869                 amdgpu_ring_write(kiq_ring, 0);
4870                 amdgpu_ring_write(kiq_ring, 0);
4871         }
4872         r = amdgpu_ring_test_helper(kiq_ring);
4873         if (r)
4874                 DRM_ERROR("KCQ disable failed\n");
4875
4876         return r;
4877 }
4878
4879 static bool gfx_v8_0_is_idle(void *handle)
4880 {
4881         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4882
4883         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4884                 || RREG32(mmGRBM_STATUS2) != 0x8)
4885                 return false;
4886         else
4887                 return true;
4888 }
4889
4890 static bool gfx_v8_0_rlc_is_idle(void *handle)
4891 {
4892         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4893
4894         if (RREG32(mmGRBM_STATUS2) != 0x8)
4895                 return false;
4896         else
4897                 return true;
4898 }
4899
4900 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4901 {
4902         unsigned int i;
4903         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4904
4905         for (i = 0; i < adev->usec_timeout; i++) {
4906                 if (gfx_v8_0_rlc_is_idle(handle))
4907                         return 0;
4908
4909                 udelay(1);
4910         }
4911         return -ETIMEDOUT;
4912 }
4913
4914 static int gfx_v8_0_wait_for_idle(void *handle)
4915 {
4916         unsigned int i;
4917         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4918
4919         for (i = 0; i < adev->usec_timeout; i++) {
4920                 if (gfx_v8_0_is_idle(handle))
4921                         return 0;
4922
4923                 udelay(1);
4924         }
4925         return -ETIMEDOUT;
4926 }
4927
4928 static int gfx_v8_0_hw_fini(void *handle)
4929 {
4930         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4931
4932         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4933         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4934
4935         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4936
4937         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4938
4939         /* disable KCQ to avoid CPC touch memory not valid anymore */
4940         gfx_v8_0_kcq_disable(adev);
4941
4942         if (amdgpu_sriov_vf(adev)) {
4943                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4944                 return 0;
4945         }
4946         amdgpu_gfx_rlc_enter_safe_mode(adev);
4947         if (!gfx_v8_0_wait_for_idle(adev))
4948                 gfx_v8_0_cp_enable(adev, false);
4949         else
4950                 pr_err("cp is busy, skip halt cp\n");
4951         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4952                 adev->gfx.rlc.funcs->stop(adev);
4953         else
4954                 pr_err("rlc is busy, skip halt rlc\n");
4955         amdgpu_gfx_rlc_exit_safe_mode(adev);
4956
4957         return 0;
4958 }
4959
4960 static int gfx_v8_0_suspend(void *handle)
4961 {
4962         return gfx_v8_0_hw_fini(handle);
4963 }
4964
4965 static int gfx_v8_0_resume(void *handle)
4966 {
4967         return gfx_v8_0_hw_init(handle);
4968 }
4969
4970 static bool gfx_v8_0_check_soft_reset(void *handle)
4971 {
4972         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4973         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4974         u32 tmp;
4975
4976         /* GRBM_STATUS */
4977         tmp = RREG32(mmGRBM_STATUS);
4978         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4979                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4980                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4981                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4982                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4983                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4984                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4985                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4986                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4987                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4988                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4989                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4990                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4991         }
4992
4993         /* GRBM_STATUS2 */
4994         tmp = RREG32(mmGRBM_STATUS2);
4995         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4996                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4997                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4998
4999         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5000             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5001             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5002                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5003                                                 SOFT_RESET_CPF, 1);
5004                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5005                                                 SOFT_RESET_CPC, 1);
5006                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5007                                                 SOFT_RESET_CPG, 1);
5008                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5009                                                 SOFT_RESET_GRBM, 1);
5010         }
5011
5012         /* SRBM_STATUS */
5013         tmp = RREG32(mmSRBM_STATUS);
5014         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5015                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5016                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5017         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5018                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5019                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5020
5021         if (grbm_soft_reset || srbm_soft_reset) {
5022                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5023                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5024                 return true;
5025         } else {
5026                 adev->gfx.grbm_soft_reset = 0;
5027                 adev->gfx.srbm_soft_reset = 0;
5028                 return false;
5029         }
5030 }
5031
5032 static int gfx_v8_0_pre_soft_reset(void *handle)
5033 {
5034         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5035         u32 grbm_soft_reset = 0;
5036
5037         if ((!adev->gfx.grbm_soft_reset) &&
5038             (!adev->gfx.srbm_soft_reset))
5039                 return 0;
5040
5041         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5042
5043         /* stop the rlc */
5044         adev->gfx.rlc.funcs->stop(adev);
5045
5046         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5047             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5048                 /* Disable GFX parsing/prefetching */
5049                 gfx_v8_0_cp_gfx_enable(adev, false);
5050
5051         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5052             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5053             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5054             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5055                 int i;
5056
5057                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5058                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5059
5060                         mutex_lock(&adev->srbm_mutex);
5061                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5062                         gfx_v8_0_deactivate_hqd(adev, 2);
5063                         vi_srbm_select(adev, 0, 0, 0, 0);
5064                         mutex_unlock(&adev->srbm_mutex);
5065                 }
5066                 /* Disable MEC parsing/prefetching */
5067                 gfx_v8_0_cp_compute_enable(adev, false);
5068         }
5069
5070         return 0;
5071 }
5072
5073 static int gfx_v8_0_soft_reset(void *handle)
5074 {
5075         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5076         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5077         u32 tmp;
5078
5079         if ((!adev->gfx.grbm_soft_reset) &&
5080             (!adev->gfx.srbm_soft_reset))
5081                 return 0;
5082
5083         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5084         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5085
5086         if (grbm_soft_reset || srbm_soft_reset) {
5087                 tmp = RREG32(mmGMCON_DEBUG);
5088                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5089                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5090                 WREG32(mmGMCON_DEBUG, tmp);
5091                 udelay(50);
5092         }
5093
5094         if (grbm_soft_reset) {
5095                 tmp = RREG32(mmGRBM_SOFT_RESET);
5096                 tmp |= grbm_soft_reset;
5097                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5098                 WREG32(mmGRBM_SOFT_RESET, tmp);
5099                 tmp = RREG32(mmGRBM_SOFT_RESET);
5100
5101                 udelay(50);
5102
5103                 tmp &= ~grbm_soft_reset;
5104                 WREG32(mmGRBM_SOFT_RESET, tmp);
5105                 tmp = RREG32(mmGRBM_SOFT_RESET);
5106         }
5107
5108         if (srbm_soft_reset) {
5109                 tmp = RREG32(mmSRBM_SOFT_RESET);
5110                 tmp |= srbm_soft_reset;
5111                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5112                 WREG32(mmSRBM_SOFT_RESET, tmp);
5113                 tmp = RREG32(mmSRBM_SOFT_RESET);
5114
5115                 udelay(50);
5116
5117                 tmp &= ~srbm_soft_reset;
5118                 WREG32(mmSRBM_SOFT_RESET, tmp);
5119                 tmp = RREG32(mmSRBM_SOFT_RESET);
5120         }
5121
5122         if (grbm_soft_reset || srbm_soft_reset) {
5123                 tmp = RREG32(mmGMCON_DEBUG);
5124                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5125                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5126                 WREG32(mmGMCON_DEBUG, tmp);
5127         }
5128
5129         /* Wait a little for things to settle down */
5130         udelay(50);
5131
5132         return 0;
5133 }
5134
5135 static int gfx_v8_0_post_soft_reset(void *handle)
5136 {
5137         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5138         u32 grbm_soft_reset = 0;
5139
5140         if ((!adev->gfx.grbm_soft_reset) &&
5141             (!adev->gfx.srbm_soft_reset))
5142                 return 0;
5143
5144         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5145
5146         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5147             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5148             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5149             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5150                 int i;
5151
5152                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5153                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5154
5155                         mutex_lock(&adev->srbm_mutex);
5156                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5157                         gfx_v8_0_deactivate_hqd(adev, 2);
5158                         vi_srbm_select(adev, 0, 0, 0, 0);
5159                         mutex_unlock(&adev->srbm_mutex);
5160                 }
5161                 gfx_v8_0_kiq_resume(adev);
5162                 gfx_v8_0_kcq_resume(adev);
5163         }
5164
5165         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5166             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5167                 gfx_v8_0_cp_gfx_resume(adev);
5168
5169         gfx_v8_0_cp_test_all_rings(adev);
5170
5171         adev->gfx.rlc.funcs->start(adev);
5172
5173         return 0;
5174 }
5175
5176 /**
5177  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5178  *
5179  * @adev: amdgpu_device pointer
5180  *
5181  * Fetches a GPU clock counter snapshot.
5182  * Returns the 64 bit clock counter snapshot.
5183  */
5184 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5185 {
5186         uint64_t clock;
5187
5188         mutex_lock(&adev->gfx.gpu_clock_mutex);
5189         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5190         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5191                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5192         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5193         return clock;
5194 }
5195
5196 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5197                                           uint32_t vmid,
5198                                           uint32_t gds_base, uint32_t gds_size,
5199                                           uint32_t gws_base, uint32_t gws_size,
5200                                           uint32_t oa_base, uint32_t oa_size)
5201 {
5202         /* GDS Base */
5203         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5204         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5205                                 WRITE_DATA_DST_SEL(0)));
5206         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5207         amdgpu_ring_write(ring, 0);
5208         amdgpu_ring_write(ring, gds_base);
5209
5210         /* GDS Size */
5211         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5212         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5213                                 WRITE_DATA_DST_SEL(0)));
5214         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5215         amdgpu_ring_write(ring, 0);
5216         amdgpu_ring_write(ring, gds_size);
5217
5218         /* GWS */
5219         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5220         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5221                                 WRITE_DATA_DST_SEL(0)));
5222         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5223         amdgpu_ring_write(ring, 0);
5224         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5225
5226         /* OA */
5227         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5228         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5229                                 WRITE_DATA_DST_SEL(0)));
5230         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5231         amdgpu_ring_write(ring, 0);
5232         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5233 }
5234
5235 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5236 {
5237         WREG32(mmSQ_IND_INDEX,
5238                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5239                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5240                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5241                 (SQ_IND_INDEX__FORCE_READ_MASK));
5242         return RREG32(mmSQ_IND_DATA);
5243 }
5244
5245 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5246                            uint32_t wave, uint32_t thread,
5247                            uint32_t regno, uint32_t num, uint32_t *out)
5248 {
5249         WREG32(mmSQ_IND_INDEX,
5250                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5251                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5252                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5253                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5254                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5255                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5256         while (num--)
5257                 *(out++) = RREG32(mmSQ_IND_DATA);
5258 }
5259
5260 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5261 {
5262         /* type 0 wave data */
5263         dst[(*no_fields)++] = 0;
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5283 }
5284
5285 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5286                                      uint32_t wave, uint32_t start,
5287                                      uint32_t size, uint32_t *dst)
5288 {
5289         wave_read_regs(
5290                 adev, simd, wave, 0,
5291                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5292 }
5293
5294
5295 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5296         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5297         .select_se_sh = &gfx_v8_0_select_se_sh,
5298         .read_wave_data = &gfx_v8_0_read_wave_data,
5299         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5300         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5301 };
5302
5303 static int gfx_v8_0_early_init(void *handle)
5304 {
5305         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5306
5307         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5308         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5309                                           AMDGPU_MAX_COMPUTE_RINGS);
5310         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5311         gfx_v8_0_set_ring_funcs(adev);
5312         gfx_v8_0_set_irq_funcs(adev);
5313         gfx_v8_0_set_gds_init(adev);
5314         gfx_v8_0_set_rlc_funcs(adev);
5315
5316         return 0;
5317 }
5318
5319 static int gfx_v8_0_late_init(void *handle)
5320 {
5321         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5322         int r;
5323
5324         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5325         if (r)
5326                 return r;
5327
5328         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5329         if (r)
5330                 return r;
5331
5332         /* requires IBs so do in late init after IB pool is initialized */
5333         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5334         if (r)
5335                 return r;
5336
5337         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5338         if (r) {
5339                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5340                 return r;
5341         }
5342
5343         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5344         if (r) {
5345                 DRM_ERROR(
5346                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5347                         r);
5348                 return r;
5349         }
5350
5351         return 0;
5352 }
5353
5354 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5355                                                        bool enable)
5356 {
5357         if ((adev->asic_type == CHIP_POLARIS11) ||
5358             (adev->asic_type == CHIP_POLARIS12) ||
5359             (adev->asic_type == CHIP_VEGAM))
5360                 /* Send msg to SMU via Powerplay */
5361                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5362
5363         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5364 }
5365
5366 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5367                                                         bool enable)
5368 {
5369         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5370 }
5371
5372 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5373                 bool enable)
5374 {
5375         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5376 }
5377
5378 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5379                                           bool enable)
5380 {
5381         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5382 }
5383
5384 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5385                                                 bool enable)
5386 {
5387         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5388
5389         /* Read any GFX register to wake up GFX. */
5390         if (!enable)
5391                 RREG32(mmDB_RENDER_CONTROL);
5392 }
5393
5394 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5395                                           bool enable)
5396 {
5397         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5398                 cz_enable_gfx_cg_power_gating(adev, true);
5399                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5400                         cz_enable_gfx_pipeline_power_gating(adev, true);
5401         } else {
5402                 cz_enable_gfx_cg_power_gating(adev, false);
5403                 cz_enable_gfx_pipeline_power_gating(adev, false);
5404         }
5405 }
5406
5407 static int gfx_v8_0_set_powergating_state(void *handle,
5408                                           enum amd_powergating_state state)
5409 {
5410         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5411         bool enable = (state == AMD_PG_STATE_GATE);
5412
5413         if (amdgpu_sriov_vf(adev))
5414                 return 0;
5415
5416         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5417                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5418                                 AMD_PG_SUPPORT_CP |
5419                                 AMD_PG_SUPPORT_GFX_DMG))
5420                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5421         switch (adev->asic_type) {
5422         case CHIP_CARRIZO:
5423         case CHIP_STONEY:
5424
5425                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5426                         cz_enable_sck_slow_down_on_power_up(adev, true);
5427                         cz_enable_sck_slow_down_on_power_down(adev, true);
5428                 } else {
5429                         cz_enable_sck_slow_down_on_power_up(adev, false);
5430                         cz_enable_sck_slow_down_on_power_down(adev, false);
5431                 }
5432                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5433                         cz_enable_cp_power_gating(adev, true);
5434                 else
5435                         cz_enable_cp_power_gating(adev, false);
5436
5437                 cz_update_gfx_cg_power_gating(adev, enable);
5438
5439                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5440                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5441                 else
5442                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5443
5444                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5445                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5446                 else
5447                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5448                 break;
5449         case CHIP_POLARIS11:
5450         case CHIP_POLARIS12:
5451         case CHIP_VEGAM:
5452                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5453                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5454                 else
5455                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5456
5457                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5458                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5459                 else
5460                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5461
5462                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5463                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5464                 else
5465                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5466                 break;
5467         default:
5468                 break;
5469         }
5470         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5471                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5472                                 AMD_PG_SUPPORT_CP |
5473                                 AMD_PG_SUPPORT_GFX_DMG))
5474                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5475         return 0;
5476 }
5477
5478 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5479 {
5480         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5481         int data;
5482
5483         if (amdgpu_sriov_vf(adev))
5484                 *flags = 0;
5485
5486         /* AMD_CG_SUPPORT_GFX_MGCG */
5487         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5488         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5489                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5490
5491         /* AMD_CG_SUPPORT_GFX_CGLG */
5492         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5493         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5494                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5495
5496         /* AMD_CG_SUPPORT_GFX_CGLS */
5497         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5498                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5499
5500         /* AMD_CG_SUPPORT_GFX_CGTS */
5501         data = RREG32(mmCGTS_SM_CTRL_REG);
5502         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5503                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5504
5505         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5506         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5507                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5508
5509         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5510         data = RREG32(mmRLC_MEM_SLP_CNTL);
5511         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5512                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5513
5514         /* AMD_CG_SUPPORT_GFX_CP_LS */
5515         data = RREG32(mmCP_MEM_SLP_CNTL);
5516         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5517                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5518 }
5519
5520 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5521                                      uint32_t reg_addr, uint32_t cmd)
5522 {
5523         uint32_t data;
5524
5525         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5526
5527         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5528         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5529
5530         data = RREG32(mmRLC_SERDES_WR_CTRL);
5531         if (adev->asic_type == CHIP_STONEY)
5532                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5533                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5534                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5535                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5536                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5537                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5538                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5539                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5540                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5541         else
5542                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5543                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5544                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5545                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5546                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5547                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5548                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5549                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5550                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5551                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5552                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5553         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5554                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5555                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5556                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5557
5558         WREG32(mmRLC_SERDES_WR_CTRL, data);
5559 }
5560
5561 #define MSG_ENTER_RLC_SAFE_MODE     1
5562 #define MSG_EXIT_RLC_SAFE_MODE      0
5563 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5564 #define RLC_GPR_REG2__REQ__SHIFT 0
5565 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5566 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5567
5568 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5569 {
5570         uint32_t rlc_setting;
5571
5572         rlc_setting = RREG32(mmRLC_CNTL);
5573         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5574                 return false;
5575
5576         return true;
5577 }
5578
5579 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5580 {
5581         uint32_t data;
5582         unsigned i;
5583         data = RREG32(mmRLC_CNTL);
5584         data |= RLC_SAFE_MODE__CMD_MASK;
5585         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5586         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5587         WREG32(mmRLC_SAFE_MODE, data);
5588
5589         /* wait for RLC_SAFE_MODE */
5590         for (i = 0; i < adev->usec_timeout; i++) {
5591                 if ((RREG32(mmRLC_GPM_STAT) &
5592                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5593                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5594                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5595                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5596                         break;
5597                 udelay(1);
5598         }
5599         for (i = 0; i < adev->usec_timeout; i++) {
5600                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5601                         break;
5602                 udelay(1);
5603         }
5604 }
5605
5606 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5607 {
5608         uint32_t data;
5609         unsigned i;
5610
5611         data = RREG32(mmRLC_CNTL);
5612         data |= RLC_SAFE_MODE__CMD_MASK;
5613         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5614         WREG32(mmRLC_SAFE_MODE, data);
5615
5616         for (i = 0; i < adev->usec_timeout; i++) {
5617                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5618                         break;
5619                 udelay(1);
5620         }
5621 }
5622
5623 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5624 {
5625         u32 data;
5626
5627         if (amdgpu_sriov_is_pp_one_vf(adev))
5628                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5629         else
5630                 data = RREG32(mmRLC_SPM_VMID);
5631
5632         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5633         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5634
5635         if (amdgpu_sriov_is_pp_one_vf(adev))
5636                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5637         else
5638                 WREG32(mmRLC_SPM_VMID, data);
5639 }
5640
5641 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5642         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5643         .set_safe_mode = gfx_v8_0_set_safe_mode,
5644         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5645         .init = gfx_v8_0_rlc_init,
5646         .get_csb_size = gfx_v8_0_get_csb_size,
5647         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5648         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5649         .resume = gfx_v8_0_rlc_resume,
5650         .stop = gfx_v8_0_rlc_stop,
5651         .reset = gfx_v8_0_rlc_reset,
5652         .start = gfx_v8_0_rlc_start,
5653         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5654 };
5655
5656 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5657                                                       bool enable)
5658 {
5659         uint32_t temp, data;
5660
5661         amdgpu_gfx_rlc_enter_safe_mode(adev);
5662
5663         /* It is disabled by HW by default */
5664         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5665                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5666                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5667                                 /* 1 - RLC memory Light sleep */
5668                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5669
5670                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5671                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5672                 }
5673
5674                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5675                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5676                 if (adev->flags & AMD_IS_APU)
5677                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5678                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5679                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5680                 else
5681                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5682                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5683                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5684                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5685
5686                 if (temp != data)
5687                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5688
5689                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5690                 gfx_v8_0_wait_for_rlc_serdes(adev);
5691
5692                 /* 5 - clear mgcg override */
5693                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5694
5695                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5696                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5697                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5698                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5699                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5700                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5701                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5702                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5703                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5704                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5705                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5706                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5707                         if (temp != data)
5708                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5709                 }
5710                 udelay(50);
5711
5712                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713                 gfx_v8_0_wait_for_rlc_serdes(adev);
5714         } else {
5715                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5716                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5717                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5718                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5719                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5720                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5721                 if (temp != data)
5722                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5723
5724                 /* 2 - disable MGLS in RLC */
5725                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5726                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5727                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5728                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5729                 }
5730
5731                 /* 3 - disable MGLS in CP */
5732                 data = RREG32(mmCP_MEM_SLP_CNTL);
5733                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5734                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5735                         WREG32(mmCP_MEM_SLP_CNTL, data);
5736                 }
5737
5738                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5739                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5740                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5741                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5742                 if (temp != data)
5743                         WREG32(mmCGTS_SM_CTRL_REG, data);
5744
5745                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5746                 gfx_v8_0_wait_for_rlc_serdes(adev);
5747
5748                 /* 6 - set mgcg override */
5749                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5750
5751                 udelay(50);
5752
5753                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5754                 gfx_v8_0_wait_for_rlc_serdes(adev);
5755         }
5756
5757         amdgpu_gfx_rlc_exit_safe_mode(adev);
5758 }
5759
5760 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5761                                                       bool enable)
5762 {
5763         uint32_t temp, temp1, data, data1;
5764
5765         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5766
5767         amdgpu_gfx_rlc_enter_safe_mode(adev);
5768
5769         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5770                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5771                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5772                 if (temp1 != data1)
5773                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5774
5775                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5776                 gfx_v8_0_wait_for_rlc_serdes(adev);
5777
5778                 /* 2 - clear cgcg override */
5779                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5780
5781                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5782                 gfx_v8_0_wait_for_rlc_serdes(adev);
5783
5784                 /* 3 - write cmd to set CGLS */
5785                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5786
5787                 /* 4 - enable cgcg */
5788                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5789
5790                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5791                         /* enable cgls*/
5792                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5793
5794                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5795                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5796
5797                         if (temp1 != data1)
5798                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5799                 } else {
5800                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5801                 }
5802
5803                 if (temp != data)
5804                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5805
5806                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5807                  * Cmp_busy/GFX_Idle interrupts
5808                  */
5809                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5810         } else {
5811                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5812                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5813
5814                 /* TEST CGCG */
5815                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5816                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5817                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5818                 if (temp1 != data1)
5819                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5820
5821                 /* read gfx register to wake up cgcg */
5822                 RREG32(mmCB_CGTT_SCLK_CTRL);
5823                 RREG32(mmCB_CGTT_SCLK_CTRL);
5824                 RREG32(mmCB_CGTT_SCLK_CTRL);
5825                 RREG32(mmCB_CGTT_SCLK_CTRL);
5826
5827                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5828                 gfx_v8_0_wait_for_rlc_serdes(adev);
5829
5830                 /* write cmd to Set CGCG Overrride */
5831                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5832
5833                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5834                 gfx_v8_0_wait_for_rlc_serdes(adev);
5835
5836                 /* write cmd to Clear CGLS */
5837                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5838
5839                 /* disable cgcg, cgls should be disabled too. */
5840                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5841                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5842                 if (temp != data)
5843                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5844                 /* enable interrupts again for PG */
5845                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5846         }
5847
5848         gfx_v8_0_wait_for_rlc_serdes(adev);
5849
5850         amdgpu_gfx_rlc_exit_safe_mode(adev);
5851 }
5852 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5853                                             bool enable)
5854 {
5855         if (enable) {
5856                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5857                  * ===  MGCG + MGLS + TS(CG/LS) ===
5858                  */
5859                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5860                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5861         } else {
5862                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5863                  * ===  CGCG + CGLS ===
5864                  */
5865                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5866                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5867         }
5868         return 0;
5869 }
5870
5871 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5872                                           enum amd_clockgating_state state)
5873 {
5874         uint32_t msg_id, pp_state = 0;
5875         uint32_t pp_support_state = 0;
5876
5877         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5878                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5879                         pp_support_state = PP_STATE_SUPPORT_LS;
5880                         pp_state = PP_STATE_LS;
5881                 }
5882                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5883                         pp_support_state |= PP_STATE_SUPPORT_CG;
5884                         pp_state |= PP_STATE_CG;
5885                 }
5886                 if (state == AMD_CG_STATE_UNGATE)
5887                         pp_state = 0;
5888
5889                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5890                                 PP_BLOCK_GFX_CG,
5891                                 pp_support_state,
5892                                 pp_state);
5893                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5894         }
5895
5896         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5897                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5898                         pp_support_state = PP_STATE_SUPPORT_LS;
5899                         pp_state = PP_STATE_LS;
5900                 }
5901
5902                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5903                         pp_support_state |= PP_STATE_SUPPORT_CG;
5904                         pp_state |= PP_STATE_CG;
5905                 }
5906
5907                 if (state == AMD_CG_STATE_UNGATE)
5908                         pp_state = 0;
5909
5910                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5911                                 PP_BLOCK_GFX_MG,
5912                                 pp_support_state,
5913                                 pp_state);
5914                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5915         }
5916
5917         return 0;
5918 }
5919
5920 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5921                                           enum amd_clockgating_state state)
5922 {
5923
5924         uint32_t msg_id, pp_state = 0;
5925         uint32_t pp_support_state = 0;
5926
5927         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5928                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5929                         pp_support_state = PP_STATE_SUPPORT_LS;
5930                         pp_state = PP_STATE_LS;
5931                 }
5932                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5933                         pp_support_state |= PP_STATE_SUPPORT_CG;
5934                         pp_state |= PP_STATE_CG;
5935                 }
5936                 if (state == AMD_CG_STATE_UNGATE)
5937                         pp_state = 0;
5938
5939                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5940                                 PP_BLOCK_GFX_CG,
5941                                 pp_support_state,
5942                                 pp_state);
5943                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5944         }
5945
5946         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5947                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5948                         pp_support_state = PP_STATE_SUPPORT_LS;
5949                         pp_state = PP_STATE_LS;
5950                 }
5951                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5952                         pp_support_state |= PP_STATE_SUPPORT_CG;
5953                         pp_state |= PP_STATE_CG;
5954                 }
5955                 if (state == AMD_CG_STATE_UNGATE)
5956                         pp_state = 0;
5957
5958                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5959                                 PP_BLOCK_GFX_3D,
5960                                 pp_support_state,
5961                                 pp_state);
5962                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5963         }
5964
5965         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5966                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5967                         pp_support_state = PP_STATE_SUPPORT_LS;
5968                         pp_state = PP_STATE_LS;
5969                 }
5970
5971                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5972                         pp_support_state |= PP_STATE_SUPPORT_CG;
5973                         pp_state |= PP_STATE_CG;
5974                 }
5975
5976                 if (state == AMD_CG_STATE_UNGATE)
5977                         pp_state = 0;
5978
5979                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5980                                 PP_BLOCK_GFX_MG,
5981                                 pp_support_state,
5982                                 pp_state);
5983                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5984         }
5985
5986         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5987                 pp_support_state = PP_STATE_SUPPORT_LS;
5988
5989                 if (state == AMD_CG_STATE_UNGATE)
5990                         pp_state = 0;
5991                 else
5992                         pp_state = PP_STATE_LS;
5993
5994                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5995                                 PP_BLOCK_GFX_RLC,
5996                                 pp_support_state,
5997                                 pp_state);
5998                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5999         }
6000
6001         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6002                 pp_support_state = PP_STATE_SUPPORT_LS;
6003
6004                 if (state == AMD_CG_STATE_UNGATE)
6005                         pp_state = 0;
6006                 else
6007                         pp_state = PP_STATE_LS;
6008                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6009                         PP_BLOCK_GFX_CP,
6010                         pp_support_state,
6011                         pp_state);
6012                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6013         }
6014
6015         return 0;
6016 }
6017
6018 static int gfx_v8_0_set_clockgating_state(void *handle,
6019                                           enum amd_clockgating_state state)
6020 {
6021         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6022
6023         if (amdgpu_sriov_vf(adev))
6024                 return 0;
6025
6026         switch (adev->asic_type) {
6027         case CHIP_FIJI:
6028         case CHIP_CARRIZO:
6029         case CHIP_STONEY:
6030                 gfx_v8_0_update_gfx_clock_gating(adev,
6031                                                  state == AMD_CG_STATE_GATE);
6032                 break;
6033         case CHIP_TONGA:
6034                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6035                 break;
6036         case CHIP_POLARIS10:
6037         case CHIP_POLARIS11:
6038         case CHIP_POLARIS12:
6039         case CHIP_VEGAM:
6040                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6041                 break;
6042         default:
6043                 break;
6044         }
6045         return 0;
6046 }
6047
6048 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6049 {
6050         return ring->adev->wb.wb[ring->rptr_offs];
6051 }
6052
6053 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6054 {
6055         struct amdgpu_device *adev = ring->adev;
6056
6057         if (ring->use_doorbell)
6058                 /* XXX check if swapping is necessary on BE */
6059                 return ring->adev->wb.wb[ring->wptr_offs];
6060         else
6061                 return RREG32(mmCP_RB0_WPTR);
6062 }
6063
6064 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6065 {
6066         struct amdgpu_device *adev = ring->adev;
6067
6068         if (ring->use_doorbell) {
6069                 /* XXX check if swapping is necessary on BE */
6070                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6071                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6072         } else {
6073                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6074                 (void)RREG32(mmCP_RB0_WPTR);
6075         }
6076 }
6077
6078 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6079 {
6080         u32 ref_and_mask, reg_mem_engine;
6081
6082         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6083             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6084                 switch (ring->me) {
6085                 case 1:
6086                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6087                         break;
6088                 case 2:
6089                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6090                         break;
6091                 default:
6092                         return;
6093                 }
6094                 reg_mem_engine = 0;
6095         } else {
6096                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6097                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6098         }
6099
6100         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6101         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6102                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6103                                  reg_mem_engine));
6104         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6105         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6106         amdgpu_ring_write(ring, ref_and_mask);
6107         amdgpu_ring_write(ring, ref_and_mask);
6108         amdgpu_ring_write(ring, 0x20); /* poll interval */
6109 }
6110
6111 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6112 {
6113         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6114         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6115                 EVENT_INDEX(4));
6116
6117         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6118         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6119                 EVENT_INDEX(0));
6120 }
6121
6122 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6123                                         struct amdgpu_job *job,
6124                                         struct amdgpu_ib *ib,
6125                                         uint32_t flags)
6126 {
6127         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6128         u32 header, control = 0;
6129
6130         if (ib->flags & AMDGPU_IB_FLAG_CE)
6131                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6132         else
6133                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6134
6135         control |= ib->length_dw | (vmid << 24);
6136
6137         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6138                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6139
6140                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6141                         gfx_v8_0_ring_emit_de_meta(ring);
6142         }
6143
6144         amdgpu_ring_write(ring, header);
6145         amdgpu_ring_write(ring,
6146 #ifdef __BIG_ENDIAN
6147                           (2 << 0) |
6148 #endif
6149                           (ib->gpu_addr & 0xFFFFFFFC));
6150         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6151         amdgpu_ring_write(ring, control);
6152 }
6153
6154 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6155                                           struct amdgpu_job *job,
6156                                           struct amdgpu_ib *ib,
6157                                           uint32_t flags)
6158 {
6159         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6160         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6161
6162         /* Currently, there is a high possibility to get wave ID mismatch
6163          * between ME and GDS, leading to a hw deadlock, because ME generates
6164          * different wave IDs than the GDS expects. This situation happens
6165          * randomly when at least 5 compute pipes use GDS ordered append.
6166          * The wave IDs generated by ME are also wrong after suspend/resume.
6167          * Those are probably bugs somewhere else in the kernel driver.
6168          *
6169          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6170          * GDS to 0 for this ring (me/pipe).
6171          */
6172         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6173                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6174                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6175                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6176         }
6177
6178         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6179         amdgpu_ring_write(ring,
6180 #ifdef __BIG_ENDIAN
6181                                 (2 << 0) |
6182 #endif
6183                                 (ib->gpu_addr & 0xFFFFFFFC));
6184         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6185         amdgpu_ring_write(ring, control);
6186 }
6187
6188 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6189                                          u64 seq, unsigned flags)
6190 {
6191         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6192         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6193
6194         /* Workaround for cache flush problems. First send a dummy EOP
6195          * event down the pipe with seq one below.
6196          */
6197         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6198         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6199                                  EOP_TC_ACTION_EN |
6200                                  EOP_TC_WB_ACTION_EN |
6201                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6202                                  EVENT_INDEX(5)));
6203         amdgpu_ring_write(ring, addr & 0xfffffffc);
6204         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6205                                 DATA_SEL(1) | INT_SEL(0));
6206         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6207         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6208
6209         /* Then send the real EOP event down the pipe:
6210          * EVENT_WRITE_EOP - flush caches, send int */
6211         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6212         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6213                                  EOP_TC_ACTION_EN |
6214                                  EOP_TC_WB_ACTION_EN |
6215                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6216                                  EVENT_INDEX(5)));
6217         amdgpu_ring_write(ring, addr & 0xfffffffc);
6218         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6219                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6220         amdgpu_ring_write(ring, lower_32_bits(seq));
6221         amdgpu_ring_write(ring, upper_32_bits(seq));
6222
6223 }
6224
6225 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6226 {
6227         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6228         uint32_t seq = ring->fence_drv.sync_seq;
6229         uint64_t addr = ring->fence_drv.gpu_addr;
6230
6231         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6232         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6233                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6234                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6235         amdgpu_ring_write(ring, addr & 0xfffffffc);
6236         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6237         amdgpu_ring_write(ring, seq);
6238         amdgpu_ring_write(ring, 0xffffffff);
6239         amdgpu_ring_write(ring, 4); /* poll interval */
6240 }
6241
6242 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6243                                         unsigned vmid, uint64_t pd_addr)
6244 {
6245         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6246
6247         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6248
6249         /* wait for the invalidate to complete */
6250         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6251         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6252                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6253                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6254         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6255         amdgpu_ring_write(ring, 0);
6256         amdgpu_ring_write(ring, 0); /* ref */
6257         amdgpu_ring_write(ring, 0); /* mask */
6258         amdgpu_ring_write(ring, 0x20); /* poll interval */
6259
6260         /* compute doesn't have PFP */
6261         if (usepfp) {
6262                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6263                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6264                 amdgpu_ring_write(ring, 0x0);
6265         }
6266 }
6267
6268 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6269 {
6270         return ring->adev->wb.wb[ring->wptr_offs];
6271 }
6272
6273 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6274 {
6275         struct amdgpu_device *adev = ring->adev;
6276
6277         /* XXX check if swapping is necessary on BE */
6278         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6279         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6280 }
6281
6282 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6283                                              u64 addr, u64 seq,
6284                                              unsigned flags)
6285 {
6286         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6287         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6288
6289         /* RELEASE_MEM - flush caches, send int */
6290         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6291         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6292                                  EOP_TC_ACTION_EN |
6293                                  EOP_TC_WB_ACTION_EN |
6294                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6295                                  EVENT_INDEX(5)));
6296         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6297         amdgpu_ring_write(ring, addr & 0xfffffffc);
6298         amdgpu_ring_write(ring, upper_32_bits(addr));
6299         amdgpu_ring_write(ring, lower_32_bits(seq));
6300         amdgpu_ring_write(ring, upper_32_bits(seq));
6301 }
6302
6303 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6304                                          u64 seq, unsigned int flags)
6305 {
6306         /* we only allocate 32bit for each seq wb address */
6307         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6308
6309         /* write fence seq to the "addr" */
6310         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6311         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6312                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6313         amdgpu_ring_write(ring, lower_32_bits(addr));
6314         amdgpu_ring_write(ring, upper_32_bits(addr));
6315         amdgpu_ring_write(ring, lower_32_bits(seq));
6316
6317         if (flags & AMDGPU_FENCE_FLAG_INT) {
6318                 /* set register to trigger INT */
6319                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6320                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6321                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6322                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6323                 amdgpu_ring_write(ring, 0);
6324                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6325         }
6326 }
6327
6328 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6329 {
6330         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6331         amdgpu_ring_write(ring, 0);
6332 }
6333
6334 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6335 {
6336         uint32_t dw2 = 0;
6337
6338         if (amdgpu_sriov_vf(ring->adev))
6339                 gfx_v8_0_ring_emit_ce_meta(ring);
6340
6341         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6342         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6343                 gfx_v8_0_ring_emit_vgt_flush(ring);
6344                 /* set load_global_config & load_global_uconfig */
6345                 dw2 |= 0x8001;
6346                 /* set load_cs_sh_regs */
6347                 dw2 |= 0x01000000;
6348                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6349                 dw2 |= 0x10002;
6350
6351                 /* set load_ce_ram if preamble presented */
6352                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6353                         dw2 |= 0x10000000;
6354         } else {
6355                 /* still load_ce_ram if this is the first time preamble presented
6356                  * although there is no context switch happens.
6357                  */
6358                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6359                         dw2 |= 0x10000000;
6360         }
6361
6362         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6363         amdgpu_ring_write(ring, dw2);
6364         amdgpu_ring_write(ring, 0);
6365 }
6366
6367 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6368 {
6369         unsigned ret;
6370
6371         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6372         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6373         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6374         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6375         ret = ring->wptr & ring->buf_mask;
6376         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6377         return ret;
6378 }
6379
6380 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6381 {
6382         unsigned cur;
6383
6384         BUG_ON(offset > ring->buf_mask);
6385         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6386
6387         cur = (ring->wptr & ring->buf_mask) - 1;
6388         if (likely(cur > offset))
6389                 ring->ring[offset] = cur - offset;
6390         else
6391                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6392 }
6393
6394 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6395                                     uint32_t reg_val_offs)
6396 {
6397         struct amdgpu_device *adev = ring->adev;
6398
6399         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6400         amdgpu_ring_write(ring, 0 |     /* src: register*/
6401                                 (5 << 8) |      /* dst: memory */
6402                                 (1 << 20));     /* write confirm */
6403         amdgpu_ring_write(ring, reg);
6404         amdgpu_ring_write(ring, 0);
6405         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6406                                 reg_val_offs * 4));
6407         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6408                                 reg_val_offs * 4));
6409 }
6410
6411 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6412                                   uint32_t val)
6413 {
6414         uint32_t cmd;
6415
6416         switch (ring->funcs->type) {
6417         case AMDGPU_RING_TYPE_GFX:
6418                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6419                 break;
6420         case AMDGPU_RING_TYPE_KIQ:
6421                 cmd = 1 << 16; /* no inc addr */
6422                 break;
6423         default:
6424                 cmd = WR_CONFIRM;
6425                 break;
6426         }
6427
6428         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6429         amdgpu_ring_write(ring, cmd);
6430         amdgpu_ring_write(ring, reg);
6431         amdgpu_ring_write(ring, 0);
6432         amdgpu_ring_write(ring, val);
6433 }
6434
6435 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6436 {
6437         struct amdgpu_device *adev = ring->adev;
6438         uint32_t value = 0;
6439
6440         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6441         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6442         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6443         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6444         WREG32(mmSQ_CMD, value);
6445 }
6446
6447 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6448                                                  enum amdgpu_interrupt_state state)
6449 {
6450         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6451                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6452 }
6453
6454 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6455                                                      int me, int pipe,
6456                                                      enum amdgpu_interrupt_state state)
6457 {
6458         u32 mec_int_cntl, mec_int_cntl_reg;
6459
6460         /*
6461          * amdgpu controls only the first MEC. That's why this function only
6462          * handles the setting of interrupts for this specific MEC. All other
6463          * pipes' interrupts are set by amdkfd.
6464          */
6465
6466         if (me == 1) {
6467                 switch (pipe) {
6468                 case 0:
6469                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6470                         break;
6471                 case 1:
6472                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6473                         break;
6474                 case 2:
6475                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6476                         break;
6477                 case 3:
6478                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6479                         break;
6480                 default:
6481                         DRM_DEBUG("invalid pipe %d\n", pipe);
6482                         return;
6483                 }
6484         } else {
6485                 DRM_DEBUG("invalid me %d\n", me);
6486                 return;
6487         }
6488
6489         switch (state) {
6490         case AMDGPU_IRQ_STATE_DISABLE:
6491                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6492                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6493                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6494                 break;
6495         case AMDGPU_IRQ_STATE_ENABLE:
6496                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6497                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6498                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6499                 break;
6500         default:
6501                 break;
6502         }
6503 }
6504
6505 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6506                                              struct amdgpu_irq_src *source,
6507                                              unsigned type,
6508                                              enum amdgpu_interrupt_state state)
6509 {
6510         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6511                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6512
6513         return 0;
6514 }
6515
6516 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6517                                               struct amdgpu_irq_src *source,
6518                                               unsigned type,
6519                                               enum amdgpu_interrupt_state state)
6520 {
6521         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6522                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6523
6524         return 0;
6525 }
6526
6527 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6528                                             struct amdgpu_irq_src *src,
6529                                             unsigned type,
6530                                             enum amdgpu_interrupt_state state)
6531 {
6532         switch (type) {
6533         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6534                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6535                 break;
6536         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6537                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6538                 break;
6539         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6540                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6541                 break;
6542         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6543                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6544                 break;
6545         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6546                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6547                 break;
6548         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6549                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6550                 break;
6551         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6552                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6553                 break;
6554         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6555                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6556                 break;
6557         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6558                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6559                 break;
6560         default:
6561                 break;
6562         }
6563         return 0;
6564 }
6565
6566 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6567                                          struct amdgpu_irq_src *source,
6568                                          unsigned int type,
6569                                          enum amdgpu_interrupt_state state)
6570 {
6571         int enable_flag;
6572
6573         switch (state) {
6574         case AMDGPU_IRQ_STATE_DISABLE:
6575                 enable_flag = 0;
6576                 break;
6577
6578         case AMDGPU_IRQ_STATE_ENABLE:
6579                 enable_flag = 1;
6580                 break;
6581
6582         default:
6583                 return -EINVAL;
6584         }
6585
6586         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6590         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6591         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                      enable_flag);
6595         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596                      enable_flag);
6597         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6598                      enable_flag);
6599         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6600                      enable_flag);
6601         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602                      enable_flag);
6603         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6604                      enable_flag);
6605         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6606                      enable_flag);
6607
6608         return 0;
6609 }
6610
6611 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6612                                      struct amdgpu_irq_src *source,
6613                                      unsigned int type,
6614                                      enum amdgpu_interrupt_state state)
6615 {
6616         int enable_flag;
6617
6618         switch (state) {
6619         case AMDGPU_IRQ_STATE_DISABLE:
6620                 enable_flag = 1;
6621                 break;
6622
6623         case AMDGPU_IRQ_STATE_ENABLE:
6624                 enable_flag = 0;
6625                 break;
6626
6627         default:
6628                 return -EINVAL;
6629         }
6630
6631         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6632                      enable_flag);
6633
6634         return 0;
6635 }
6636
6637 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6638                             struct amdgpu_irq_src *source,
6639                             struct amdgpu_iv_entry *entry)
6640 {
6641         int i;
6642         u8 me_id, pipe_id, queue_id;
6643         struct amdgpu_ring *ring;
6644
6645         DRM_DEBUG("IH: CP EOP\n");
6646         me_id = (entry->ring_id & 0x0c) >> 2;
6647         pipe_id = (entry->ring_id & 0x03) >> 0;
6648         queue_id = (entry->ring_id & 0x70) >> 4;
6649
6650         switch (me_id) {
6651         case 0:
6652                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6653                 break;
6654         case 1:
6655         case 2:
6656                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6657                         ring = &adev->gfx.compute_ring[i];
6658                         /* Per-queue interrupt is supported for MEC starting from VI.
6659                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6660                           */
6661                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6662                                 amdgpu_fence_process(ring);
6663                 }
6664                 break;
6665         }
6666         return 0;
6667 }
6668
6669 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6670                            struct amdgpu_iv_entry *entry)
6671 {
6672         u8 me_id, pipe_id, queue_id;
6673         struct amdgpu_ring *ring;
6674         int i;
6675
6676         me_id = (entry->ring_id & 0x0c) >> 2;
6677         pipe_id = (entry->ring_id & 0x03) >> 0;
6678         queue_id = (entry->ring_id & 0x70) >> 4;
6679
6680         switch (me_id) {
6681         case 0:
6682                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6683                 break;
6684         case 1:
6685         case 2:
6686                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6687                         ring = &adev->gfx.compute_ring[i];
6688                         if (ring->me == me_id && ring->pipe == pipe_id &&
6689                             ring->queue == queue_id)
6690                                 drm_sched_fault(&ring->sched);
6691                 }
6692                 break;
6693         }
6694 }
6695
6696 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6697                                  struct amdgpu_irq_src *source,
6698                                  struct amdgpu_iv_entry *entry)
6699 {
6700         DRM_ERROR("Illegal register access in command stream\n");
6701         gfx_v8_0_fault(adev, entry);
6702         return 0;
6703 }
6704
6705 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6706                                   struct amdgpu_irq_src *source,
6707                                   struct amdgpu_iv_entry *entry)
6708 {
6709         DRM_ERROR("Illegal instruction in command stream\n");
6710         gfx_v8_0_fault(adev, entry);
6711         return 0;
6712 }
6713
6714 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6715                                      struct amdgpu_irq_src *source,
6716                                      struct amdgpu_iv_entry *entry)
6717 {
6718         DRM_ERROR("CP EDC/ECC error detected.");
6719         return 0;
6720 }
6721
6722 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6723                                   bool from_wq)
6724 {
6725         u32 enc, se_id, sh_id, cu_id;
6726         char type[20];
6727         int sq_edc_source = -1;
6728
6729         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6730         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6731
6732         switch (enc) {
6733                 case 0:
6734                         DRM_INFO("SQ general purpose intr detected:"
6735                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6736                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6737                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6738                                         "wlt %d, thread_trace %d.\n",
6739                                         se_id,
6740                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6741                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6742                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6743                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6744                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6745                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6746                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6747                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6748                                         );
6749                         break;
6750                 case 1:
6751                 case 2:
6752
6753                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6754                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6755
6756                         /*
6757                          * This function can be called either directly from ISR
6758                          * or from BH in which case we can access SQ_EDC_INFO
6759                          * instance
6760                          */
6761                         if (from_wq) {
6762                                 mutex_lock(&adev->grbm_idx_mutex);
6763                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6764
6765                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6766
6767                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6768                                 mutex_unlock(&adev->grbm_idx_mutex);
6769                         }
6770
6771                         if (enc == 1)
6772                                 sprintf(type, "instruction intr");
6773                         else
6774                                 sprintf(type, "EDC/ECC error");
6775
6776                         DRM_INFO(
6777                                 "SQ %s detected: "
6778                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6779                                         "trap %s, sq_ed_info.source %s.\n",
6780                                         type, se_id, sh_id, cu_id,
6781                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6782                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6783                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6784                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6785                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6786                                 );
6787                         break;
6788                 default:
6789                         DRM_ERROR("SQ invalid encoding type\n.");
6790         }
6791 }
6792
6793 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6794 {
6795
6796         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6797         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6798
6799         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6800 }
6801
6802 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6803                            struct amdgpu_irq_src *source,
6804                            struct amdgpu_iv_entry *entry)
6805 {
6806         unsigned ih_data = entry->src_data[0];
6807
6808         /*
6809          * Try to submit work so SQ_EDC_INFO can be accessed from
6810          * BH. If previous work submission hasn't finished yet
6811          * just print whatever info is possible directly from the ISR.
6812          */
6813         if (work_pending(&adev->gfx.sq_work.work)) {
6814                 gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6815         } else {
6816                 adev->gfx.sq_work.ih_data = ih_data;
6817                 schedule_work(&adev->gfx.sq_work.work);
6818         }
6819
6820         return 0;
6821 }
6822
6823 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6824 {
6825         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6826         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6827                           PACKET3_TC_ACTION_ENA |
6828                           PACKET3_SH_KCACHE_ACTION_ENA |
6829                           PACKET3_SH_ICACHE_ACTION_ENA |
6830                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6831         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6832         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6833         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6834 }
6835
6836 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6837 {
6838         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6839         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6840                           PACKET3_TC_ACTION_ENA |
6841                           PACKET3_SH_KCACHE_ACTION_ENA |
6842                           PACKET3_SH_ICACHE_ACTION_ENA |
6843                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6844         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6845         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6846         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6847         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6848         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6849 }
6850
6851
6852 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6853 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT       0x0000007f
6854 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6855                                         uint32_t pipe, bool enable)
6856 {
6857         uint32_t val;
6858         uint32_t wcl_cs_reg;
6859
6860         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6861
6862         switch (pipe) {
6863         case 0:
6864                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6865                 break;
6866         case 1:
6867                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6868                 break;
6869         case 2:
6870                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6871                 break;
6872         case 3:
6873                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6874                 break;
6875         default:
6876                 DRM_DEBUG("invalid pipe %d\n", pipe);
6877                 return;
6878         }
6879
6880         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6881
6882 }
6883
6884 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT      0x07ffffff
6885 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6886 {
6887         struct amdgpu_device *adev = ring->adev;
6888         uint32_t val;
6889         int i;
6890
6891         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6892          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6893          * around 25% of gpu resources.
6894          */
6895         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6896         amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6897
6898         /* Restrict waves for normal/low priority compute queues as well
6899          * to get best QoS for high priority compute jobs.
6900          *
6901          * amdgpu controls only 1st ME(0-3 CS pipes).
6902          */
6903         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6904                 if (i != ring->pipe)
6905                         gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6906
6907         }
6908
6909 }
6910
6911 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6912         .name = "gfx_v8_0",
6913         .early_init = gfx_v8_0_early_init,
6914         .late_init = gfx_v8_0_late_init,
6915         .sw_init = gfx_v8_0_sw_init,
6916         .sw_fini = gfx_v8_0_sw_fini,
6917         .hw_init = gfx_v8_0_hw_init,
6918         .hw_fini = gfx_v8_0_hw_fini,
6919         .suspend = gfx_v8_0_suspend,
6920         .resume = gfx_v8_0_resume,
6921         .is_idle = gfx_v8_0_is_idle,
6922         .wait_for_idle = gfx_v8_0_wait_for_idle,
6923         .check_soft_reset = gfx_v8_0_check_soft_reset,
6924         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6925         .soft_reset = gfx_v8_0_soft_reset,
6926         .post_soft_reset = gfx_v8_0_post_soft_reset,
6927         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6928         .set_powergating_state = gfx_v8_0_set_powergating_state,
6929         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6930 };
6931
6932 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6933         .type = AMDGPU_RING_TYPE_GFX,
6934         .align_mask = 0xff,
6935         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6936         .support_64bit_ptrs = false,
6937         .get_rptr = gfx_v8_0_ring_get_rptr,
6938         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6939         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6940         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6941                 5 +  /* COND_EXEC */
6942                 7 +  /* PIPELINE_SYNC */
6943                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6944                 12 +  /* FENCE for VM_FLUSH */
6945                 20 + /* GDS switch */
6946                 4 + /* double SWITCH_BUFFER,
6947                        the first COND_EXEC jump to the place just
6948                            prior to this double SWITCH_BUFFER  */
6949                 5 + /* COND_EXEC */
6950                 7 +      /*     HDP_flush */
6951                 4 +      /*     VGT_flush */
6952                 14 + /* CE_META */
6953                 31 + /* DE_META */
6954                 3 + /* CNTX_CTRL */
6955                 5 + /* HDP_INVL */
6956                 12 + 12 + /* FENCE x2 */
6957                 2 + /* SWITCH_BUFFER */
6958                 5, /* SURFACE_SYNC */
6959         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6960         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6961         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6962         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6963         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6964         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6965         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6966         .test_ring = gfx_v8_0_ring_test_ring,
6967         .test_ib = gfx_v8_0_ring_test_ib,
6968         .insert_nop = amdgpu_ring_insert_nop,
6969         .pad_ib = amdgpu_ring_generic_pad_ib,
6970         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6971         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6972         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6973         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6974         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6975         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6976         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6977 };
6978
6979 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6980         .type = AMDGPU_RING_TYPE_COMPUTE,
6981         .align_mask = 0xff,
6982         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6983         .support_64bit_ptrs = false,
6984         .get_rptr = gfx_v8_0_ring_get_rptr,
6985         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6986         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6987         .emit_frame_size =
6988                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6989                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6990                 5 + /* hdp_invalidate */
6991                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6992                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6993                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6994                 7 + /* gfx_v8_0_emit_mem_sync_compute */
6995                 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6996                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6997         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6998         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6999         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7000         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7001         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7002         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7003         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7004         .test_ring = gfx_v8_0_ring_test_ring,
7005         .test_ib = gfx_v8_0_ring_test_ib,
7006         .insert_nop = amdgpu_ring_insert_nop,
7007         .pad_ib = amdgpu_ring_generic_pad_ib,
7008         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7009         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7010         .emit_wave_limit = gfx_v8_0_emit_wave_limit,
7011 };
7012
7013 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7014         .type = AMDGPU_RING_TYPE_KIQ,
7015         .align_mask = 0xff,
7016         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7017         .support_64bit_ptrs = false,
7018         .get_rptr = gfx_v8_0_ring_get_rptr,
7019         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7020         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7021         .emit_frame_size =
7022                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7023                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7024                 5 + /* hdp_invalidate */
7025                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7026                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7027                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7028         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7029         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7030         .test_ring = gfx_v8_0_ring_test_ring,
7031         .insert_nop = amdgpu_ring_insert_nop,
7032         .pad_ib = amdgpu_ring_generic_pad_ib,
7033         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7034         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7035 };
7036
7037 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7038 {
7039         int i;
7040
7041         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7042
7043         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7044                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7045
7046         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7047                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7048 }
7049
7050 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7051         .set = gfx_v8_0_set_eop_interrupt_state,
7052         .process = gfx_v8_0_eop_irq,
7053 };
7054
7055 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7056         .set = gfx_v8_0_set_priv_reg_fault_state,
7057         .process = gfx_v8_0_priv_reg_irq,
7058 };
7059
7060 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7061         .set = gfx_v8_0_set_priv_inst_fault_state,
7062         .process = gfx_v8_0_priv_inst_irq,
7063 };
7064
7065 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7066         .set = gfx_v8_0_set_cp_ecc_int_state,
7067         .process = gfx_v8_0_cp_ecc_error_irq,
7068 };
7069
7070 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7071         .set = gfx_v8_0_set_sq_int_state,
7072         .process = gfx_v8_0_sq_irq,
7073 };
7074
7075 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7076 {
7077         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7078         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7079
7080         adev->gfx.priv_reg_irq.num_types = 1;
7081         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7082
7083         adev->gfx.priv_inst_irq.num_types = 1;
7084         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7085
7086         adev->gfx.cp_ecc_error_irq.num_types = 1;
7087         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7088
7089         adev->gfx.sq_irq.num_types = 1;
7090         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7091 }
7092
7093 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7094 {
7095         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7096 }
7097
7098 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7099 {
7100         /* init asci gds info */
7101         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7102         adev->gds.gws_size = 64;
7103         adev->gds.oa_size = 16;
7104         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7105 }
7106
7107 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7108                                                  u32 bitmap)
7109 {
7110         u32 data;
7111
7112         if (!bitmap)
7113                 return;
7114
7115         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7116         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7117
7118         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7119 }
7120
7121 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7122 {
7123         u32 data, mask;
7124
7125         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7126                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7127
7128         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7129
7130         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7131 }
7132
7133 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7134 {
7135         int i, j, k, counter, active_cu_number = 0;
7136         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7137         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7138         unsigned disable_masks[4 * 2];
7139         u32 ao_cu_num;
7140
7141         memset(cu_info, 0, sizeof(*cu_info));
7142
7143         if (adev->flags & AMD_IS_APU)
7144                 ao_cu_num = 2;
7145         else
7146                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7147
7148         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7149
7150         mutex_lock(&adev->grbm_idx_mutex);
7151         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7152                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7153                         mask = 1;
7154                         ao_bitmap = 0;
7155                         counter = 0;
7156                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7157                         if (i < 4 && j < 2)
7158                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7159                                         adev, disable_masks[i * 2 + j]);
7160                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7161                         cu_info->bitmap[i][j] = bitmap;
7162
7163                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7164                                 if (bitmap & mask) {
7165                                         if (counter < ao_cu_num)
7166                                                 ao_bitmap |= mask;
7167                                         counter ++;
7168                                 }
7169                                 mask <<= 1;
7170                         }
7171                         active_cu_number += counter;
7172                         if (i < 2 && j < 2)
7173                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7174                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7175                 }
7176         }
7177         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7178         mutex_unlock(&adev->grbm_idx_mutex);
7179
7180         cu_info->number = active_cu_number;
7181         cu_info->ao_cu_mask = ao_cu_mask;
7182         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7183         cu_info->max_waves_per_simd = 10;
7184         cu_info->max_scratch_slots_per_cu = 32;
7185         cu_info->wave_front_size = 64;
7186         cu_info->lds_size = 64;
7187 }
7188
7189 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7190 {
7191         .type = AMD_IP_BLOCK_TYPE_GFX,
7192         .major = 8,
7193         .minor = 0,
7194         .rev = 0,
7195         .funcs = &gfx_v8_0_ip_funcs,
7196 };
7197
7198 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7199 {
7200         .type = AMD_IP_BLOCK_TYPE_GFX,
7201         .major = 8,
7202         .minor = 1,
7203         .rev = 0,
7204         .funcs = &gfx_v8_0_ip_funcs,
7205 };
7206
7207 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7208 {
7209         uint64_t ce_payload_addr;
7210         int cnt_ce;
7211         union {
7212                 struct vi_ce_ib_state regular;
7213                 struct vi_ce_ib_state_chained_ib chained;
7214         } ce_payload = {};
7215
7216         if (ring->adev->virt.chained_ib_support) {
7217                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7218                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7219                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7220         } else {
7221                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7222                         offsetof(struct vi_gfx_meta_data, ce_payload);
7223                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7224         }
7225
7226         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7227         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7228                                 WRITE_DATA_DST_SEL(8) |
7229                                 WR_CONFIRM) |
7230                                 WRITE_DATA_CACHE_POLICY(0));
7231         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7232         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7233         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7234 }
7235
7236 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7237 {
7238         uint64_t de_payload_addr, gds_addr, csa_addr;
7239         int cnt_de;
7240         union {
7241                 struct vi_de_ib_state regular;
7242                 struct vi_de_ib_state_chained_ib chained;
7243         } de_payload = {};
7244
7245         csa_addr = amdgpu_csa_vaddr(ring->adev);
7246         gds_addr = csa_addr + 4096;
7247         if (ring->adev->virt.chained_ib_support) {
7248                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7249                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7250                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7251                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7252         } else {
7253                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7254                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7255                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7256                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7257         }
7258
7259         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7260         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7261                                 WRITE_DATA_DST_SEL(8) |
7262                                 WR_CONFIRM) |
7263                                 WRITE_DATA_CACHE_POLICY(0));
7264         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7265         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7266         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7267 }