drm/amd: drop dependencies on drm_os_linux.h
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_gfx.h"
31 #include "vi.h"
32 #include "vi_structs.h"
33 #include "vid.h"
34 #include "amdgpu_ucode.h"
35 #include "amdgpu_atombios.h"
36 #include "atombios_i2c.h"
37 #include "clearstate_vi.h"
38
39 #include "gmc/gmc_8_2_d.h"
40 #include "gmc/gmc_8_2_sh_mask.h"
41
42 #include "oss/oss_3_0_d.h"
43 #include "oss/oss_3_0_sh_mask.h"
44
45 #include "bif/bif_5_0_d.h"
46 #include "bif/bif_5_0_sh_mask.h"
47 #include "gca/gfx_8_0_d.h"
48 #include "gca/gfx_8_0_enum.h"
49 #include "gca/gfx_8_0_sh_mask.h"
50
51 #include "dce/dce_10_0_d.h"
52 #include "dce/dce_10_0_sh_mask.h"
53
54 #include "smu/smu_7_1_3_d.h"
55
56 #include "ivsrcid/ivsrcid_vislands30.h"
57
58 #define GFX8_NUM_GFX_RINGS     1
59 #define GFX8_MEC_HPD_SIZE 4096
60
61 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
62 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
64 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
65
66 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
67 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
68 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
69 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
70 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
71 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
72 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
73 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
74 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
75
76 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
77 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
78 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
79 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
81 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
82
83 /* BPM SERDES CMD */
84 #define SET_BPM_SERDES_CMD    1
85 #define CLE_BPM_SERDES_CMD    0
86
87 /* BPM Register Address*/
88 enum {
89         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
90         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
91         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
92         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
93         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
94         BPM_REG_FGCG_MAX
95 };
96
97 #define RLC_FormatDirectRegListLength        14
98
99 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
105
106 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
118
119 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
124
125 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
131
132 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
143
144 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
155
156 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
167
168 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
174
175 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
176 {
177         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
178         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
179         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
180         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
181         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
182         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
183         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
184         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
185         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
186         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
187         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
188         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
189         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
190         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
191         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
192         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
193 };
194
195 static const u32 golden_settings_tonga_a11[] =
196 {
197         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
198         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
199         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
200         mmGB_GPU_ID, 0x0000000f, 0x00000000,
201         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
202         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
203         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
204         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
205         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
206         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
207         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
208         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
209         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
210         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
211         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
212         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
213 };
214
215 static const u32 tonga_golden_common_all[] =
216 {
217         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
218         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
219         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
220         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
221         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
222         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
224         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
225 };
226
227 static const u32 tonga_mgcg_cgcg_init[] =
228 {
229         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
230         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
231         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
236         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
238         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
240         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
251         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
252         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
254         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
255         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
256         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
257         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
259         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
260         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
266         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
267         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
268         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
269         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
270         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
271         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
272         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
273         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
274         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
275         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
276         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
277         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
278         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
279         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
280         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
281         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
282         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
283         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
284         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
285         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
286         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
287         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
288         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
289         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
290         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
291         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
292         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
293         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
294         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
295         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
296         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
297         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
298         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
299         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
300         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
301         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
302         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
303         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
304 };
305
306 static const u32 golden_settings_vegam_a11[] =
307 {
308         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
309         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
310         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
311         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
312         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
313         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
314         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
315         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
316         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
317         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
318         mmSQ_CONFIG, 0x07f80000, 0x01180000,
319         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
320         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
321         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
322         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
323         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
324         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
325 };
326
327 static const u32 vegam_golden_common_all[] =
328 {
329         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
330         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
331         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
332         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
334         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
335 };
336
337 static const u32 golden_settings_polaris11_a11[] =
338 {
339         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
340         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
341         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
342         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
343         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
344         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
345         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
346         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
347         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
348         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
349         mmSQ_CONFIG, 0x07f80000, 0x01180000,
350         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
353         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
354         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
355         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
356 };
357
358 static const u32 polaris11_golden_common_all[] =
359 {
360         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
362         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
363         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
365         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
366 };
367
368 static const u32 golden_settings_polaris10_a11[] =
369 {
370         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
371         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
372         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
373         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
374         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
375         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
376         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
377         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
378         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
379         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
381         mmSQ_CONFIG, 0x07f80000, 0x07180000,
382         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
383         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
384         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
385         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
386         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
387 };
388
389 static const u32 polaris10_golden_common_all[] =
390 {
391         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
393         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
394         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
395         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
396         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
398         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
399 };
400
401 static const u32 fiji_golden_common_all[] =
402 {
403         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
404         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
405         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
406         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
407         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
408         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
410         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
411         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
412         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
413 };
414
415 static const u32 golden_settings_fiji_a10[] =
416 {
417         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
418         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
419         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
420         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
421         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
422         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
427         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
428 };
429
430 static const u32 fiji_mgcg_cgcg_init[] =
431 {
432         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
439         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
443         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
454         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
458         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
459         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
460         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
462         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
464         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
465         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
466         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
467 };
468
469 static const u32 golden_settings_iceland_a11[] =
470 {
471         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
472         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
473         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
474         mmGB_GPU_ID, 0x0000000f, 0x00000000,
475         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
476         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
477         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
478         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
479         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
480         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
481         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
482         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
483         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
484         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
485         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
486         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
487 };
488
489 static const u32 iceland_golden_common_all[] =
490 {
491         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
493         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
494         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
495         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
496         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
498         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
499 };
500
501 static const u32 iceland_mgcg_cgcg_init[] =
502 {
503         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
504         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
505         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
506         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
508         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
512         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
514         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
525         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
526         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
528         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
529         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
530         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
531         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
533         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
534         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
535         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
536         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
537         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
538         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
539         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
540         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
541         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
542         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
543         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
544         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
545         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
546         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
547         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
548         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
549         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
550         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
551         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
552         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
553         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
554         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
557         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
560         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
561         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
562         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
563         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
564         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
565         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
566         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
567 };
568
569 static const u32 cz_golden_settings_a11[] =
570 {
571         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
572         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
573         mmGB_GPU_ID, 0x0000000f, 0x00000000,
574         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
575         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
576         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
577         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
578         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
579         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
580         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
581         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
582         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
583 };
584
585 static const u32 cz_golden_common_all[] =
586 {
587         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
589         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
590         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
591         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
592         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
594         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
595 };
596
597 static const u32 cz_mgcg_cgcg_init[] =
598 {
599         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
600         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
601         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
608         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
610         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
621         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
622         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
624         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
625         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
626         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
627         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
629         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
630         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
631         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
632         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
633         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
634         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
635         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
636         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
637         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
638         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
639         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
640         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
641         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
642         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
643         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
644         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
645         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
646         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
647         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
648         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
649         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
650         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
651         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
652         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
653         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
654         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
655         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
656         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
657         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
658         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
659         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
660         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
661         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
662         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
663         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
664         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
665         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
666         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
667         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
668         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
669         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
670         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
671         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
672         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
673         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
674 };
675
676 static const u32 stoney_golden_settings_a11[] =
677 {
678         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
679         mmGB_GPU_ID, 0x0000000f, 0x00000000,
680         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
681         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
682         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
683         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
684         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
685         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
686         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
687         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
688 };
689
690 static const u32 stoney_golden_common_all[] =
691 {
692         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
693         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
694         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
695         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
696         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
697         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
699         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
700 };
701
702 static const u32 stoney_mgcg_cgcg_init[] =
703 {
704         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
705         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
706         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
707         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
709 };
710
711
712 static const char * const sq_edc_source_names[] = {
713         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
714         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
715         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
716         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
717         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
718         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
719         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
720 };
721
722 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
726 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
727 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
728 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
729 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
730
731 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
732 {
733         switch (adev->asic_type) {
734         case CHIP_TOPAZ:
735                 amdgpu_device_program_register_sequence(adev,
736                                                         iceland_mgcg_cgcg_init,
737                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
738                 amdgpu_device_program_register_sequence(adev,
739                                                         golden_settings_iceland_a11,
740                                                         ARRAY_SIZE(golden_settings_iceland_a11));
741                 amdgpu_device_program_register_sequence(adev,
742                                                         iceland_golden_common_all,
743                                                         ARRAY_SIZE(iceland_golden_common_all));
744                 break;
745         case CHIP_FIJI:
746                 amdgpu_device_program_register_sequence(adev,
747                                                         fiji_mgcg_cgcg_init,
748                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
749                 amdgpu_device_program_register_sequence(adev,
750                                                         golden_settings_fiji_a10,
751                                                         ARRAY_SIZE(golden_settings_fiji_a10));
752                 amdgpu_device_program_register_sequence(adev,
753                                                         fiji_golden_common_all,
754                                                         ARRAY_SIZE(fiji_golden_common_all));
755                 break;
756
757         case CHIP_TONGA:
758                 amdgpu_device_program_register_sequence(adev,
759                                                         tonga_mgcg_cgcg_init,
760                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
761                 amdgpu_device_program_register_sequence(adev,
762                                                         golden_settings_tonga_a11,
763                                                         ARRAY_SIZE(golden_settings_tonga_a11));
764                 amdgpu_device_program_register_sequence(adev,
765                                                         tonga_golden_common_all,
766                                                         ARRAY_SIZE(tonga_golden_common_all));
767                 break;
768         case CHIP_VEGAM:
769                 amdgpu_device_program_register_sequence(adev,
770                                                         golden_settings_vegam_a11,
771                                                         ARRAY_SIZE(golden_settings_vegam_a11));
772                 amdgpu_device_program_register_sequence(adev,
773                                                         vegam_golden_common_all,
774                                                         ARRAY_SIZE(vegam_golden_common_all));
775                 break;
776         case CHIP_POLARIS11:
777         case CHIP_POLARIS12:
778                 amdgpu_device_program_register_sequence(adev,
779                                                         golden_settings_polaris11_a11,
780                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
781                 amdgpu_device_program_register_sequence(adev,
782                                                         polaris11_golden_common_all,
783                                                         ARRAY_SIZE(polaris11_golden_common_all));
784                 break;
785         case CHIP_POLARIS10:
786                 amdgpu_device_program_register_sequence(adev,
787                                                         golden_settings_polaris10_a11,
788                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
789                 amdgpu_device_program_register_sequence(adev,
790                                                         polaris10_golden_common_all,
791                                                         ARRAY_SIZE(polaris10_golden_common_all));
792                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
793                 if (adev->pdev->revision == 0xc7 &&
794                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
795                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
796                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
797                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
799                 }
800                 break;
801         case CHIP_CARRIZO:
802                 amdgpu_device_program_register_sequence(adev,
803                                                         cz_mgcg_cgcg_init,
804                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
805                 amdgpu_device_program_register_sequence(adev,
806                                                         cz_golden_settings_a11,
807                                                         ARRAY_SIZE(cz_golden_settings_a11));
808                 amdgpu_device_program_register_sequence(adev,
809                                                         cz_golden_common_all,
810                                                         ARRAY_SIZE(cz_golden_common_all));
811                 break;
812         case CHIP_STONEY:
813                 amdgpu_device_program_register_sequence(adev,
814                                                         stoney_mgcg_cgcg_init,
815                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
816                 amdgpu_device_program_register_sequence(adev,
817                                                         stoney_golden_settings_a11,
818                                                         ARRAY_SIZE(stoney_golden_settings_a11));
819                 amdgpu_device_program_register_sequence(adev,
820                                                         stoney_golden_common_all,
821                                                         ARRAY_SIZE(stoney_golden_common_all));
822                 break;
823         default:
824                 break;
825         }
826 }
827
828 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
829 {
830         adev->gfx.scratch.num_reg = 8;
831         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
832         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
833 }
834
835 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
836 {
837         struct amdgpu_device *adev = ring->adev;
838         uint32_t scratch;
839         uint32_t tmp = 0;
840         unsigned i;
841         int r;
842
843         r = amdgpu_gfx_scratch_get(adev, &scratch);
844         if (r)
845                 return r;
846
847         WREG32(scratch, 0xCAFEDEAD);
848         r = amdgpu_ring_alloc(ring, 3);
849         if (r)
850                 goto error_free_scratch;
851
852         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
853         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
854         amdgpu_ring_write(ring, 0xDEADBEEF);
855         amdgpu_ring_commit(ring);
856
857         for (i = 0; i < adev->usec_timeout; i++) {
858                 tmp = RREG32(scratch);
859                 if (tmp == 0xDEADBEEF)
860                         break;
861                 udelay(1);
862         }
863
864         if (i >= adev->usec_timeout)
865                 r = -ETIMEDOUT;
866
867 error_free_scratch:
868         amdgpu_gfx_scratch_free(adev, scratch);
869         return r;
870 }
871
872 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
873 {
874         struct amdgpu_device *adev = ring->adev;
875         struct amdgpu_ib ib;
876         struct dma_fence *f = NULL;
877
878         unsigned int index;
879         uint64_t gpu_addr;
880         uint32_t tmp;
881         long r;
882
883         r = amdgpu_device_wb_get(adev, &index);
884         if (r)
885                 return r;
886
887         gpu_addr = adev->wb.gpu_addr + (index * 4);
888         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
889         memset(&ib, 0, sizeof(ib));
890         r = amdgpu_ib_get(adev, NULL, 16, &ib);
891         if (r)
892                 goto err1;
893
894         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
895         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
896         ib.ptr[2] = lower_32_bits(gpu_addr);
897         ib.ptr[3] = upper_32_bits(gpu_addr);
898         ib.ptr[4] = 0xDEADBEEF;
899         ib.length_dw = 5;
900
901         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
902         if (r)
903                 goto err2;
904
905         r = dma_fence_wait_timeout(f, false, timeout);
906         if (r == 0) {
907                 r = -ETIMEDOUT;
908                 goto err2;
909         } else if (r < 0) {
910                 goto err2;
911         }
912
913         tmp = adev->wb.wb[index];
914         if (tmp == 0xDEADBEEF)
915                 r = 0;
916         else
917                 r = -EINVAL;
918
919 err2:
920         amdgpu_ib_free(adev, &ib, NULL);
921         dma_fence_put(f);
922 err1:
923         amdgpu_device_wb_free(adev, index);
924         return r;
925 }
926
927
928 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
929 {
930         release_firmware(adev->gfx.pfp_fw);
931         adev->gfx.pfp_fw = NULL;
932         release_firmware(adev->gfx.me_fw);
933         adev->gfx.me_fw = NULL;
934         release_firmware(adev->gfx.ce_fw);
935         adev->gfx.ce_fw = NULL;
936         release_firmware(adev->gfx.rlc_fw);
937         adev->gfx.rlc_fw = NULL;
938         release_firmware(adev->gfx.mec_fw);
939         adev->gfx.mec_fw = NULL;
940         if ((adev->asic_type != CHIP_STONEY) &&
941             (adev->asic_type != CHIP_TOPAZ))
942                 release_firmware(adev->gfx.mec2_fw);
943         adev->gfx.mec2_fw = NULL;
944
945         kfree(adev->gfx.rlc.register_list_format);
946 }
947
948 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
949 {
950         const char *chip_name;
951         char fw_name[30];
952         int err;
953         struct amdgpu_firmware_info *info = NULL;
954         const struct common_firmware_header *header = NULL;
955         const struct gfx_firmware_header_v1_0 *cp_hdr;
956         const struct rlc_firmware_header_v2_0 *rlc_hdr;
957         unsigned int *tmp = NULL, i;
958
959         DRM_DEBUG("\n");
960
961         switch (adev->asic_type) {
962         case CHIP_TOPAZ:
963                 chip_name = "topaz";
964                 break;
965         case CHIP_TONGA:
966                 chip_name = "tonga";
967                 break;
968         case CHIP_CARRIZO:
969                 chip_name = "carrizo";
970                 break;
971         case CHIP_FIJI:
972                 chip_name = "fiji";
973                 break;
974         case CHIP_STONEY:
975                 chip_name = "stoney";
976                 break;
977         case CHIP_POLARIS10:
978                 chip_name = "polaris10";
979                 break;
980         case CHIP_POLARIS11:
981                 chip_name = "polaris11";
982                 break;
983         case CHIP_POLARIS12:
984                 chip_name = "polaris12";
985                 break;
986         case CHIP_VEGAM:
987                 chip_name = "vegam";
988                 break;
989         default:
990                 BUG();
991         }
992
993         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
994                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
995                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996                 if (err == -ENOENT) {
997                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
998                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
999                 }
1000         } else {
1001                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1002                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1003         }
1004         if (err)
1005                 goto out;
1006         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1007         if (err)
1008                 goto out;
1009         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1010         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1011         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1012
1013         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1014                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1015                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016                 if (err == -ENOENT) {
1017                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1018                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1019                 }
1020         } else {
1021                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1022                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1023         }
1024         if (err)
1025                 goto out;
1026         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1027         if (err)
1028                 goto out;
1029         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1030         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1031
1032         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1033
1034         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1035                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1036                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037                 if (err == -ENOENT) {
1038                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1039                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1040                 }
1041         } else {
1042                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1043                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1044         }
1045         if (err)
1046                 goto out;
1047         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1048         if (err)
1049                 goto out;
1050         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1051         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1052         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1053
1054         /*
1055          * Support for MCBP/Virtualization in combination with chained IBs is
1056          * formal released on feature version #46
1057          */
1058         if (adev->gfx.ce_feature_version >= 46 &&
1059             adev->gfx.pfp_feature_version >= 46) {
1060                 adev->virt.chained_ib_support = true;
1061                 DRM_INFO("Chained IB support enabled!\n");
1062         } else
1063                 adev->virt.chained_ib_support = false;
1064
1065         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1066         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1067         if (err)
1068                 goto out;
1069         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1070         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1071         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1072         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1073
1074         adev->gfx.rlc.save_and_restore_offset =
1075                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1076         adev->gfx.rlc.clear_state_descriptor_offset =
1077                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1078         adev->gfx.rlc.avail_scratch_ram_locations =
1079                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1080         adev->gfx.rlc.reg_restore_list_size =
1081                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1082         adev->gfx.rlc.reg_list_format_start =
1083                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1084         adev->gfx.rlc.reg_list_format_separate_start =
1085                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1086         adev->gfx.rlc.starting_offsets_start =
1087                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1088         adev->gfx.rlc.reg_list_format_size_bytes =
1089                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1090         adev->gfx.rlc.reg_list_size_bytes =
1091                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1092
1093         adev->gfx.rlc.register_list_format =
1094                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1095                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1096
1097         if (!adev->gfx.rlc.register_list_format) {
1098                 err = -ENOMEM;
1099                 goto out;
1100         }
1101
1102         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1103                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1104         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1105                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1106
1107         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1108
1109         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1110                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1111         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1112                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1113
1114         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1115                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1116                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117                 if (err == -ENOENT) {
1118                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1119                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1120                 }
1121         } else {
1122                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1123                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1124         }
1125         if (err)
1126                 goto out;
1127         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1128         if (err)
1129                 goto out;
1130         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1131         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1132         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1133
1134         if ((adev->asic_type != CHIP_STONEY) &&
1135             (adev->asic_type != CHIP_TOPAZ)) {
1136                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1137                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1138                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                         if (err == -ENOENT) {
1140                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1141                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1142                         }
1143                 } else {
1144                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1145                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1146                 }
1147                 if (!err) {
1148                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1149                         if (err)
1150                                 goto out;
1151                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1152                                 adev->gfx.mec2_fw->data;
1153                         adev->gfx.mec2_fw_version =
1154                                 le32_to_cpu(cp_hdr->header.ucode_version);
1155                         adev->gfx.mec2_feature_version =
1156                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1157                 } else {
1158                         err = 0;
1159                         adev->gfx.mec2_fw = NULL;
1160                 }
1161         }
1162
1163         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1164         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1165         info->fw = adev->gfx.pfp_fw;
1166         header = (const struct common_firmware_header *)info->fw->data;
1167         adev->firmware.fw_size +=
1168                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1169
1170         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1171         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1172         info->fw = adev->gfx.me_fw;
1173         header = (const struct common_firmware_header *)info->fw->data;
1174         adev->firmware.fw_size +=
1175                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1176
1177         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1178         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1179         info->fw = adev->gfx.ce_fw;
1180         header = (const struct common_firmware_header *)info->fw->data;
1181         adev->firmware.fw_size +=
1182                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1183
1184         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1185         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1186         info->fw = adev->gfx.rlc_fw;
1187         header = (const struct common_firmware_header *)info->fw->data;
1188         adev->firmware.fw_size +=
1189                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1190
1191         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1192         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1193         info->fw = adev->gfx.mec_fw;
1194         header = (const struct common_firmware_header *)info->fw->data;
1195         adev->firmware.fw_size +=
1196                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1197
1198         /* we need account JT in */
1199         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1200         adev->firmware.fw_size +=
1201                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1202
1203         if (amdgpu_sriov_vf(adev)) {
1204                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1205                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1206                 info->fw = adev->gfx.mec_fw;
1207                 adev->firmware.fw_size +=
1208                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1209         }
1210
1211         if (adev->gfx.mec2_fw) {
1212                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1213                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1214                 info->fw = adev->gfx.mec2_fw;
1215                 header = (const struct common_firmware_header *)info->fw->data;
1216                 adev->firmware.fw_size +=
1217                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1218         }
1219
1220 out:
1221         if (err) {
1222                 dev_err(adev->dev,
1223                         "gfx8: Failed to load firmware \"%s\"\n",
1224                         fw_name);
1225                 release_firmware(adev->gfx.pfp_fw);
1226                 adev->gfx.pfp_fw = NULL;
1227                 release_firmware(adev->gfx.me_fw);
1228                 adev->gfx.me_fw = NULL;
1229                 release_firmware(adev->gfx.ce_fw);
1230                 adev->gfx.ce_fw = NULL;
1231                 release_firmware(adev->gfx.rlc_fw);
1232                 adev->gfx.rlc_fw = NULL;
1233                 release_firmware(adev->gfx.mec_fw);
1234                 adev->gfx.mec_fw = NULL;
1235                 release_firmware(adev->gfx.mec2_fw);
1236                 adev->gfx.mec2_fw = NULL;
1237         }
1238         return err;
1239 }
1240
1241 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1242                                     volatile u32 *buffer)
1243 {
1244         u32 count = 0, i;
1245         const struct cs_section_def *sect = NULL;
1246         const struct cs_extent_def *ext = NULL;
1247
1248         if (adev->gfx.rlc.cs_data == NULL)
1249                 return;
1250         if (buffer == NULL)
1251                 return;
1252
1253         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1254         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1255
1256         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1257         buffer[count++] = cpu_to_le32(0x80000000);
1258         buffer[count++] = cpu_to_le32(0x80000000);
1259
1260         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1261                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1262                         if (sect->id == SECT_CONTEXT) {
1263                                 buffer[count++] =
1264                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1265                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1266                                                 PACKET3_SET_CONTEXT_REG_START);
1267                                 for (i = 0; i < ext->reg_count; i++)
1268                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1269                         } else {
1270                                 return;
1271                         }
1272                 }
1273         }
1274
1275         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1276         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1277                         PACKET3_SET_CONTEXT_REG_START);
1278         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1279         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1280
1281         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1282         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1283
1284         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1285         buffer[count++] = cpu_to_le32(0);
1286 }
1287
1288 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1289 {
1290         if (adev->asic_type == CHIP_CARRIZO)
1291                 return 5;
1292         else
1293                 return 4;
1294 }
1295
1296 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1297 {
1298         const struct cs_section_def *cs_data;
1299         int r;
1300
1301         adev->gfx.rlc.cs_data = vi_cs_data;
1302
1303         cs_data = adev->gfx.rlc.cs_data;
1304
1305         if (cs_data) {
1306                 /* init clear state block */
1307                 r = amdgpu_gfx_rlc_init_csb(adev);
1308                 if (r)
1309                         return r;
1310         }
1311
1312         if ((adev->asic_type == CHIP_CARRIZO) ||
1313             (adev->asic_type == CHIP_STONEY)) {
1314                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315                 r = amdgpu_gfx_rlc_init_cpt(adev);
1316                 if (r)
1317                         return r;
1318         }
1319
1320         return 0;
1321 }
1322
1323 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1324 {
1325         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1326 }
1327
1328 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1329 {
1330         int r;
1331         u32 *hpd;
1332         size_t mec_hpd_size;
1333
1334         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1335
1336         /* take ownership of the relevant compute queues */
1337         amdgpu_gfx_compute_queue_acquire(adev);
1338
1339         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1340
1341         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1342                                       AMDGPU_GEM_DOMAIN_VRAM,
1343                                       &adev->gfx.mec.hpd_eop_obj,
1344                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1345                                       (void **)&hpd);
1346         if (r) {
1347                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1348                 return r;
1349         }
1350
1351         memset(hpd, 0, mec_hpd_size);
1352
1353         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1354         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1355
1356         return 0;
1357 }
1358
1359 static const u32 vgpr_init_compute_shader[] =
1360 {
1361         0x7e000209, 0x7e020208,
1362         0x7e040207, 0x7e060206,
1363         0x7e080205, 0x7e0a0204,
1364         0x7e0c0203, 0x7e0e0202,
1365         0x7e100201, 0x7e120200,
1366         0x7e140209, 0x7e160208,
1367         0x7e180207, 0x7e1a0206,
1368         0x7e1c0205, 0x7e1e0204,
1369         0x7e200203, 0x7e220202,
1370         0x7e240201, 0x7e260200,
1371         0x7e280209, 0x7e2a0208,
1372         0x7e2c0207, 0x7e2e0206,
1373         0x7e300205, 0x7e320204,
1374         0x7e340203, 0x7e360202,
1375         0x7e380201, 0x7e3a0200,
1376         0x7e3c0209, 0x7e3e0208,
1377         0x7e400207, 0x7e420206,
1378         0x7e440205, 0x7e460204,
1379         0x7e480203, 0x7e4a0202,
1380         0x7e4c0201, 0x7e4e0200,
1381         0x7e500209, 0x7e520208,
1382         0x7e540207, 0x7e560206,
1383         0x7e580205, 0x7e5a0204,
1384         0x7e5c0203, 0x7e5e0202,
1385         0x7e600201, 0x7e620200,
1386         0x7e640209, 0x7e660208,
1387         0x7e680207, 0x7e6a0206,
1388         0x7e6c0205, 0x7e6e0204,
1389         0x7e700203, 0x7e720202,
1390         0x7e740201, 0x7e760200,
1391         0x7e780209, 0x7e7a0208,
1392         0x7e7c0207, 0x7e7e0206,
1393         0xbf8a0000, 0xbf810000,
1394 };
1395
1396 static const u32 sgpr_init_compute_shader[] =
1397 {
1398         0xbe8a0100, 0xbe8c0102,
1399         0xbe8e0104, 0xbe900106,
1400         0xbe920108, 0xbe940100,
1401         0xbe960102, 0xbe980104,
1402         0xbe9a0106, 0xbe9c0108,
1403         0xbe9e0100, 0xbea00102,
1404         0xbea20104, 0xbea40106,
1405         0xbea60108, 0xbea80100,
1406         0xbeaa0102, 0xbeac0104,
1407         0xbeae0106, 0xbeb00108,
1408         0xbeb20100, 0xbeb40102,
1409         0xbeb60104, 0xbeb80106,
1410         0xbeba0108, 0xbebc0100,
1411         0xbebe0102, 0xbec00104,
1412         0xbec20106, 0xbec40108,
1413         0xbec60100, 0xbec80102,
1414         0xbee60004, 0xbee70005,
1415         0xbeea0006, 0xbeeb0007,
1416         0xbee80008, 0xbee90009,
1417         0xbefc0000, 0xbf8a0000,
1418         0xbf810000, 0x00000000,
1419 };
1420
1421 static const u32 vgpr_init_regs[] =
1422 {
1423         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1424         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1425         mmCOMPUTE_NUM_THREAD_X, 256*4,
1426         mmCOMPUTE_NUM_THREAD_Y, 1,
1427         mmCOMPUTE_NUM_THREAD_Z, 1,
1428         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1429         mmCOMPUTE_PGM_RSRC2, 20,
1430         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1431         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1432         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1433         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1434         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1435         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1436         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1437         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1438         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1439         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1440 };
1441
1442 static const u32 sgpr1_init_regs[] =
1443 {
1444         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1445         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1446         mmCOMPUTE_NUM_THREAD_X, 256*5,
1447         mmCOMPUTE_NUM_THREAD_Y, 1,
1448         mmCOMPUTE_NUM_THREAD_Z, 1,
1449         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1450         mmCOMPUTE_PGM_RSRC2, 20,
1451         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1452         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1453         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1454         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1455         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1456         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1457         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1458         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1459         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1460         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1461 };
1462
1463 static const u32 sgpr2_init_regs[] =
1464 {
1465         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1466         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1467         mmCOMPUTE_NUM_THREAD_X, 256*5,
1468         mmCOMPUTE_NUM_THREAD_Y, 1,
1469         mmCOMPUTE_NUM_THREAD_Z, 1,
1470         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1471         mmCOMPUTE_PGM_RSRC2, 20,
1472         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1473         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1474         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1475         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1476         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1477         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1478         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1479         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1480         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1481         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1482 };
1483
1484 static const u32 sec_ded_counter_registers[] =
1485 {
1486         mmCPC_EDC_ATC_CNT,
1487         mmCPC_EDC_SCRATCH_CNT,
1488         mmCPC_EDC_UCODE_CNT,
1489         mmCPF_EDC_ATC_CNT,
1490         mmCPF_EDC_ROQ_CNT,
1491         mmCPF_EDC_TAG_CNT,
1492         mmCPG_EDC_ATC_CNT,
1493         mmCPG_EDC_DMA_CNT,
1494         mmCPG_EDC_TAG_CNT,
1495         mmDC_EDC_CSINVOC_CNT,
1496         mmDC_EDC_RESTORE_CNT,
1497         mmDC_EDC_STATE_CNT,
1498         mmGDS_EDC_CNT,
1499         mmGDS_EDC_GRBM_CNT,
1500         mmGDS_EDC_OA_DED,
1501         mmSPI_EDC_CNT,
1502         mmSQC_ATC_EDC_GATCL1_CNT,
1503         mmSQC_EDC_CNT,
1504         mmSQ_EDC_DED_CNT,
1505         mmSQ_EDC_INFO,
1506         mmSQ_EDC_SEC_CNT,
1507         mmTCC_EDC_CNT,
1508         mmTCP_ATC_EDC_GATCL1_CNT,
1509         mmTCP_EDC_CNT,
1510         mmTD_EDC_CNT
1511 };
1512
1513 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1514 {
1515         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1516         struct amdgpu_ib ib;
1517         struct dma_fence *f = NULL;
1518         int r, i;
1519         u32 tmp;
1520         unsigned total_size, vgpr_offset, sgpr_offset;
1521         u64 gpu_addr;
1522
1523         /* only supported on CZ */
1524         if (adev->asic_type != CHIP_CARRIZO)
1525                 return 0;
1526
1527         /* bail if the compute ring is not ready */
1528         if (!ring->sched.ready)
1529                 return 0;
1530
1531         tmp = RREG32(mmGB_EDC_MODE);
1532         WREG32(mmGB_EDC_MODE, 0);
1533
1534         total_size =
1535                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1536         total_size +=
1537                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1538         total_size +=
1539                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1540         total_size = ALIGN(total_size, 256);
1541         vgpr_offset = total_size;
1542         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1543         sgpr_offset = total_size;
1544         total_size += sizeof(sgpr_init_compute_shader);
1545
1546         /* allocate an indirect buffer to put the commands in */
1547         memset(&ib, 0, sizeof(ib));
1548         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1549         if (r) {
1550                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1551                 return r;
1552         }
1553
1554         /* load the compute shaders */
1555         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1556                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1557
1558         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1559                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1560
1561         /* init the ib length to 0 */
1562         ib.length_dw = 0;
1563
1564         /* VGPR */
1565         /* write the register state for the compute dispatch */
1566         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1567                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1568                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1569                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1570         }
1571         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1572         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1573         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1574         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1575         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1576         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1577
1578         /* write dispatch packet */
1579         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1580         ib.ptr[ib.length_dw++] = 8; /* x */
1581         ib.ptr[ib.length_dw++] = 1; /* y */
1582         ib.ptr[ib.length_dw++] = 1; /* z */
1583         ib.ptr[ib.length_dw++] =
1584                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1585
1586         /* write CS partial flush packet */
1587         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1588         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1589
1590         /* SGPR1 */
1591         /* write the register state for the compute dispatch */
1592         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1593                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1594                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1595                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1596         }
1597         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1598         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1599         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1600         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1601         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1602         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1603
1604         /* write dispatch packet */
1605         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1606         ib.ptr[ib.length_dw++] = 8; /* x */
1607         ib.ptr[ib.length_dw++] = 1; /* y */
1608         ib.ptr[ib.length_dw++] = 1; /* z */
1609         ib.ptr[ib.length_dw++] =
1610                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1611
1612         /* write CS partial flush packet */
1613         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1614         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1615
1616         /* SGPR2 */
1617         /* write the register state for the compute dispatch */
1618         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1619                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1620                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1621                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1622         }
1623         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1624         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1625         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1626         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1627         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1628         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1629
1630         /* write dispatch packet */
1631         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1632         ib.ptr[ib.length_dw++] = 8; /* x */
1633         ib.ptr[ib.length_dw++] = 1; /* y */
1634         ib.ptr[ib.length_dw++] = 1; /* z */
1635         ib.ptr[ib.length_dw++] =
1636                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1637
1638         /* write CS partial flush packet */
1639         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1640         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1641
1642         /* shedule the ib on the ring */
1643         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1644         if (r) {
1645                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1646                 goto fail;
1647         }
1648
1649         /* wait for the GPU to finish processing the IB */
1650         r = dma_fence_wait(f, false);
1651         if (r) {
1652                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1653                 goto fail;
1654         }
1655
1656         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1657         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1658         WREG32(mmGB_EDC_MODE, tmp);
1659
1660         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1661         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1662         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1663
1664
1665         /* read back registers to clear the counters */
1666         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1667                 RREG32(sec_ded_counter_registers[i]);
1668
1669 fail:
1670         amdgpu_ib_free(adev, &ib, NULL);
1671         dma_fence_put(f);
1672
1673         return r;
1674 }
1675
1676 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1677 {
1678         u32 gb_addr_config;
1679         u32 mc_shared_chmap, mc_arb_ramcfg;
1680         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1681         u32 tmp;
1682         int ret;
1683
1684         switch (adev->asic_type) {
1685         case CHIP_TOPAZ:
1686                 adev->gfx.config.max_shader_engines = 1;
1687                 adev->gfx.config.max_tile_pipes = 2;
1688                 adev->gfx.config.max_cu_per_sh = 6;
1689                 adev->gfx.config.max_sh_per_se = 1;
1690                 adev->gfx.config.max_backends_per_se = 2;
1691                 adev->gfx.config.max_texture_channel_caches = 2;
1692                 adev->gfx.config.max_gprs = 256;
1693                 adev->gfx.config.max_gs_threads = 32;
1694                 adev->gfx.config.max_hw_contexts = 8;
1695
1696                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1697                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1698                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1699                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1700                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1701                 break;
1702         case CHIP_FIJI:
1703                 adev->gfx.config.max_shader_engines = 4;
1704                 adev->gfx.config.max_tile_pipes = 16;
1705                 adev->gfx.config.max_cu_per_sh = 16;
1706                 adev->gfx.config.max_sh_per_se = 1;
1707                 adev->gfx.config.max_backends_per_se = 4;
1708                 adev->gfx.config.max_texture_channel_caches = 16;
1709                 adev->gfx.config.max_gprs = 256;
1710                 adev->gfx.config.max_gs_threads = 32;
1711                 adev->gfx.config.max_hw_contexts = 8;
1712
1713                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1714                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1715                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1716                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1717                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1718                 break;
1719         case CHIP_POLARIS11:
1720         case CHIP_POLARIS12:
1721                 ret = amdgpu_atombios_get_gfx_info(adev);
1722                 if (ret)
1723                         return ret;
1724                 adev->gfx.config.max_gprs = 256;
1725                 adev->gfx.config.max_gs_threads = 32;
1726                 adev->gfx.config.max_hw_contexts = 8;
1727
1728                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1729                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1730                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1731                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1732                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1733                 break;
1734         case CHIP_POLARIS10:
1735         case CHIP_VEGAM:
1736                 ret = amdgpu_atombios_get_gfx_info(adev);
1737                 if (ret)
1738                         return ret;
1739                 adev->gfx.config.max_gprs = 256;
1740                 adev->gfx.config.max_gs_threads = 32;
1741                 adev->gfx.config.max_hw_contexts = 8;
1742
1743                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1744                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1745                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1746                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1747                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1748                 break;
1749         case CHIP_TONGA:
1750                 adev->gfx.config.max_shader_engines = 4;
1751                 adev->gfx.config.max_tile_pipes = 8;
1752                 adev->gfx.config.max_cu_per_sh = 8;
1753                 adev->gfx.config.max_sh_per_se = 1;
1754                 adev->gfx.config.max_backends_per_se = 2;
1755                 adev->gfx.config.max_texture_channel_caches = 8;
1756                 adev->gfx.config.max_gprs = 256;
1757                 adev->gfx.config.max_gs_threads = 32;
1758                 adev->gfx.config.max_hw_contexts = 8;
1759
1760                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1761                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1762                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1763                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1764                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1765                 break;
1766         case CHIP_CARRIZO:
1767                 adev->gfx.config.max_shader_engines = 1;
1768                 adev->gfx.config.max_tile_pipes = 2;
1769                 adev->gfx.config.max_sh_per_se = 1;
1770                 adev->gfx.config.max_backends_per_se = 2;
1771                 adev->gfx.config.max_cu_per_sh = 8;
1772                 adev->gfx.config.max_texture_channel_caches = 2;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_STONEY:
1784                 adev->gfx.config.max_shader_engines = 1;
1785                 adev->gfx.config.max_tile_pipes = 2;
1786                 adev->gfx.config.max_sh_per_se = 1;
1787                 adev->gfx.config.max_backends_per_se = 1;
1788                 adev->gfx.config.max_cu_per_sh = 3;
1789                 adev->gfx.config.max_texture_channel_caches = 2;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 16;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         default:
1801                 adev->gfx.config.max_shader_engines = 2;
1802                 adev->gfx.config.max_tile_pipes = 4;
1803                 adev->gfx.config.max_cu_per_sh = 2;
1804                 adev->gfx.config.max_sh_per_se = 1;
1805                 adev->gfx.config.max_backends_per_se = 2;
1806                 adev->gfx.config.max_texture_channel_caches = 4;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         }
1818
1819         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1820         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1821         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1822
1823         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1824         adev->gfx.config.mem_max_burst_length_bytes = 256;
1825         if (adev->flags & AMD_IS_APU) {
1826                 /* Get memory bank mapping mode. */
1827                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1828                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1829                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1830
1831                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1832                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1833                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1834
1835                 /* Validate settings in case only one DIMM installed. */
1836                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1837                         dimm00_addr_map = 0;
1838                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1839                         dimm01_addr_map = 0;
1840                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1841                         dimm10_addr_map = 0;
1842                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1843                         dimm11_addr_map = 0;
1844
1845                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1846                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1847                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1848                         adev->gfx.config.mem_row_size_in_kb = 2;
1849                 else
1850                         adev->gfx.config.mem_row_size_in_kb = 1;
1851         } else {
1852                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1853                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1854                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1855                         adev->gfx.config.mem_row_size_in_kb = 4;
1856         }
1857
1858         adev->gfx.config.shader_engine_tile_size = 32;
1859         adev->gfx.config.num_gpus = 1;
1860         adev->gfx.config.multi_gpu_tile_size = 64;
1861
1862         /* fix up row size */
1863         switch (adev->gfx.config.mem_row_size_in_kb) {
1864         case 1:
1865         default:
1866                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1867                 break;
1868         case 2:
1869                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1870                 break;
1871         case 4:
1872                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1873                 break;
1874         }
1875         adev->gfx.config.gb_addr_config = gb_addr_config;
1876
1877         return 0;
1878 }
1879
1880 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1881                                         int mec, int pipe, int queue)
1882 {
1883         int r;
1884         unsigned irq_type;
1885         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1886
1887         ring = &adev->gfx.compute_ring[ring_id];
1888
1889         /* mec0 is me1 */
1890         ring->me = mec + 1;
1891         ring->pipe = pipe;
1892         ring->queue = queue;
1893
1894         ring->ring_obj = NULL;
1895         ring->use_doorbell = true;
1896         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1897         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1898                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1899         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1900
1901         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1902                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1903                 + ring->pipe;
1904
1905         /* type-2 packets are deprecated on MEC, use type-3 instead */
1906         r = amdgpu_ring_init(adev, ring, 1024,
1907                         &adev->gfx.eop_irq, irq_type);
1908         if (r)
1909                 return r;
1910
1911
1912         return 0;
1913 }
1914
1915 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1916
1917 static int gfx_v8_0_sw_init(void *handle)
1918 {
1919         int i, j, k, r, ring_id;
1920         struct amdgpu_ring *ring;
1921         struct amdgpu_kiq *kiq;
1922         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1923
1924         switch (adev->asic_type) {
1925         case CHIP_TONGA:
1926         case CHIP_CARRIZO:
1927         case CHIP_FIJI:
1928         case CHIP_POLARIS10:
1929         case CHIP_POLARIS11:
1930         case CHIP_POLARIS12:
1931         case CHIP_VEGAM:
1932                 adev->gfx.mec.num_mec = 2;
1933                 break;
1934         case CHIP_TOPAZ:
1935         case CHIP_STONEY:
1936         default:
1937                 adev->gfx.mec.num_mec = 1;
1938                 break;
1939         }
1940
1941         adev->gfx.mec.num_pipe_per_mec = 4;
1942         adev->gfx.mec.num_queue_per_pipe = 8;
1943
1944         /* EOP Event */
1945         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1946         if (r)
1947                 return r;
1948
1949         /* Privileged reg */
1950         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1951                               &adev->gfx.priv_reg_irq);
1952         if (r)
1953                 return r;
1954
1955         /* Privileged inst */
1956         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1957                               &adev->gfx.priv_inst_irq);
1958         if (r)
1959                 return r;
1960
1961         /* Add CP EDC/ECC irq  */
1962         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1963                               &adev->gfx.cp_ecc_error_irq);
1964         if (r)
1965                 return r;
1966
1967         /* SQ interrupts. */
1968         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1969                               &adev->gfx.sq_irq);
1970         if (r) {
1971                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1972                 return r;
1973         }
1974
1975         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1976
1977         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1978
1979         gfx_v8_0_scratch_init(adev);
1980
1981         r = gfx_v8_0_init_microcode(adev);
1982         if (r) {
1983                 DRM_ERROR("Failed to load gfx firmware!\n");
1984                 return r;
1985         }
1986
1987         r = adev->gfx.rlc.funcs->init(adev);
1988         if (r) {
1989                 DRM_ERROR("Failed to init rlc BOs!\n");
1990                 return r;
1991         }
1992
1993         r = gfx_v8_0_mec_init(adev);
1994         if (r) {
1995                 DRM_ERROR("Failed to init MEC BOs!\n");
1996                 return r;
1997         }
1998
1999         /* set up the gfx ring */
2000         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2001                 ring = &adev->gfx.gfx_ring[i];
2002                 ring->ring_obj = NULL;
2003                 sprintf(ring->name, "gfx");
2004                 /* no gfx doorbells on iceland */
2005                 if (adev->asic_type != CHIP_TOPAZ) {
2006                         ring->use_doorbell = true;
2007                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2008                 }
2009
2010                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2011                                      AMDGPU_CP_IRQ_GFX_EOP);
2012                 if (r)
2013                         return r;
2014         }
2015
2016
2017         /* set up the compute queues - allocate horizontally across pipes */
2018         ring_id = 0;
2019         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2020                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2021                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2022                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2023                                         continue;
2024
2025                                 r = gfx_v8_0_compute_ring_init(adev,
2026                                                                 ring_id,
2027                                                                 i, k, j);
2028                                 if (r)
2029                                         return r;
2030
2031                                 ring_id++;
2032                         }
2033                 }
2034         }
2035
2036         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2037         if (r) {
2038                 DRM_ERROR("Failed to init KIQ BOs!\n");
2039                 return r;
2040         }
2041
2042         kiq = &adev->gfx.kiq;
2043         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2044         if (r)
2045                 return r;
2046
2047         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2048         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2049         if (r)
2050                 return r;
2051
2052         adev->gfx.ce_ram_size = 0x8000;
2053
2054         r = gfx_v8_0_gpu_early_init(adev);
2055         if (r)
2056                 return r;
2057
2058         return 0;
2059 }
2060
2061 static int gfx_v8_0_sw_fini(void *handle)
2062 {
2063         int i;
2064         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2065
2066         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2067         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2068         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2069
2070         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2071                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2072         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2073                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2074
2075         amdgpu_gfx_compute_mqd_sw_fini(adev);
2076         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2077         amdgpu_gfx_kiq_fini(adev);
2078
2079         gfx_v8_0_mec_fini(adev);
2080         amdgpu_gfx_rlc_fini(adev);
2081         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2082                                 &adev->gfx.rlc.clear_state_gpu_addr,
2083                                 (void **)&adev->gfx.rlc.cs_ptr);
2084         if ((adev->asic_type == CHIP_CARRIZO) ||
2085             (adev->asic_type == CHIP_STONEY)) {
2086                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2087                                 &adev->gfx.rlc.cp_table_gpu_addr,
2088                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2089         }
2090         gfx_v8_0_free_microcode(adev);
2091
2092         return 0;
2093 }
2094
2095 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2096 {
2097         uint32_t *modearray, *mod2array;
2098         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2099         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2100         u32 reg_offset;
2101
2102         modearray = adev->gfx.config.tile_mode_array;
2103         mod2array = adev->gfx.config.macrotile_mode_array;
2104
2105         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2106                 modearray[reg_offset] = 0;
2107
2108         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2109                 mod2array[reg_offset] = 0;
2110
2111         switch (adev->asic_type) {
2112         case CHIP_TOPAZ:
2113                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2114                                 PIPE_CONFIG(ADDR_SURF_P2) |
2115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2117                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118                                 PIPE_CONFIG(ADDR_SURF_P2) |
2119                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2121                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2122                                 PIPE_CONFIG(ADDR_SURF_P2) |
2123                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2125                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2126                                 PIPE_CONFIG(ADDR_SURF_P2) |
2127                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2129                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130                                 PIPE_CONFIG(ADDR_SURF_P2) |
2131                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2133                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2134                                 PIPE_CONFIG(ADDR_SURF_P2) |
2135                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2138                                 PIPE_CONFIG(ADDR_SURF_P2) |
2139                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2140                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2141                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2142                                 PIPE_CONFIG(ADDR_SURF_P2));
2143                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2144                                 PIPE_CONFIG(ADDR_SURF_P2) |
2145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2146                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2147                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148                                  PIPE_CONFIG(ADDR_SURF_P2) |
2149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2151                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2152                                  PIPE_CONFIG(ADDR_SURF_P2) |
2153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2155                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156                                  PIPE_CONFIG(ADDR_SURF_P2) |
2157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2159                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                  PIPE_CONFIG(ADDR_SURF_P2) |
2161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2164                                  PIPE_CONFIG(ADDR_SURF_P2) |
2165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2168                                  PIPE_CONFIG(ADDR_SURF_P2) |
2169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2171                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2172                                  PIPE_CONFIG(ADDR_SURF_P2) |
2173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2175                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2176                                  PIPE_CONFIG(ADDR_SURF_P2) |
2177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2179                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2180                                  PIPE_CONFIG(ADDR_SURF_P2) |
2181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2183                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2184                                  PIPE_CONFIG(ADDR_SURF_P2) |
2185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2187                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2188                                  PIPE_CONFIG(ADDR_SURF_P2) |
2189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2191                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2192                                  PIPE_CONFIG(ADDR_SURF_P2) |
2193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2195                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2196                                  PIPE_CONFIG(ADDR_SURF_P2) |
2197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2199                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2200                                  PIPE_CONFIG(ADDR_SURF_P2) |
2201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2203                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2204                                  PIPE_CONFIG(ADDR_SURF_P2) |
2205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2208                                  PIPE_CONFIG(ADDR_SURF_P2) |
2209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2212                                  PIPE_CONFIG(ADDR_SURF_P2) |
2213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2215
2216                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2219                                 NUM_BANKS(ADDR_SURF_8_BANK));
2220                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2223                                 NUM_BANKS(ADDR_SURF_8_BANK));
2224                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2227                                 NUM_BANKS(ADDR_SURF_8_BANK));
2228                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2231                                 NUM_BANKS(ADDR_SURF_8_BANK));
2232                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2235                                 NUM_BANKS(ADDR_SURF_8_BANK));
2236                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239                                 NUM_BANKS(ADDR_SURF_8_BANK));
2240                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2243                                 NUM_BANKS(ADDR_SURF_8_BANK));
2244                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2247                                 NUM_BANKS(ADDR_SURF_16_BANK));
2248                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2249                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251                                 NUM_BANKS(ADDR_SURF_16_BANK));
2252                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2253                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2254                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2255                                  NUM_BANKS(ADDR_SURF_16_BANK));
2256                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2257                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2258                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2259                                  NUM_BANKS(ADDR_SURF_16_BANK));
2260                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2262                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2263                                  NUM_BANKS(ADDR_SURF_16_BANK));
2264                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267                                  NUM_BANKS(ADDR_SURF_16_BANK));
2268                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2270                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2271                                  NUM_BANKS(ADDR_SURF_8_BANK));
2272
2273                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2274                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2275                             reg_offset != 23)
2276                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2277
2278                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2279                         if (reg_offset != 7)
2280                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2281
2282                 break;
2283         case CHIP_FIJI:
2284         case CHIP_VEGAM:
2285                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2286                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2289                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2293                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2297                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2301                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2303                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2305                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2306                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2309                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2310                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2313                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2314                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2315                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2316                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2317                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2318                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2319                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2322                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2323                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2327                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2328                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2331                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2335                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2344                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2347                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2351                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2355                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2356                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2363                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2367                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2368                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2371                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2372                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2375                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2376                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2379                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2380                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2383                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2384                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2387                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2388                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2391                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2400                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2403                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2404                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2407
2408                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411                                 NUM_BANKS(ADDR_SURF_8_BANK));
2412                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2414                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2415                                 NUM_BANKS(ADDR_SURF_8_BANK));
2416                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2418                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2419                                 NUM_BANKS(ADDR_SURF_8_BANK));
2420                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2422                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423                                 NUM_BANKS(ADDR_SURF_8_BANK));
2424                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2426                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2427                                 NUM_BANKS(ADDR_SURF_8_BANK));
2428                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431                                 NUM_BANKS(ADDR_SURF_8_BANK));
2432                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2434                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2435                                 NUM_BANKS(ADDR_SURF_8_BANK));
2436                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2438                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439                                 NUM_BANKS(ADDR_SURF_8_BANK));
2440                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2442                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443                                 NUM_BANKS(ADDR_SURF_8_BANK));
2444                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447                                  NUM_BANKS(ADDR_SURF_8_BANK));
2448                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451                                  NUM_BANKS(ADDR_SURF_8_BANK));
2452                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                                  NUM_BANKS(ADDR_SURF_8_BANK));
2456                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459                                  NUM_BANKS(ADDR_SURF_8_BANK));
2460                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                                  NUM_BANKS(ADDR_SURF_4_BANK));
2464
2465                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2466                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2467
2468                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2469                         if (reg_offset != 7)
2470                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2471
2472                 break;
2473         case CHIP_TONGA:
2474                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2475                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2476                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2477                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2478                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2479                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2480                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2481                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2482                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2483                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2485                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2486                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2487                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2489                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2490                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2491                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2492                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2493                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2494                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2495                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2497                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2498                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2501                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2502                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2504                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2505                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2506                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2507                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2508                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2511                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2512                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2516                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2517                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2520                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2524                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2525                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2528                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2532                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2533                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2536                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2540                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2544                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2545                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2552                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2556                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2557                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2560                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2561                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2564                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2565                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2566                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2567                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2568                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2569                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2571                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2572                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2573                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2575                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2576                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2577                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2580                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2584                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2588                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2589                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2592                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2595                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2596
2597                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600                                 NUM_BANKS(ADDR_SURF_16_BANK));
2601                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2603                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2604                                 NUM_BANKS(ADDR_SURF_16_BANK));
2605                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2607                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2608                                 NUM_BANKS(ADDR_SURF_16_BANK));
2609                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2611                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2612                                 NUM_BANKS(ADDR_SURF_16_BANK));
2613                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2615                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2616                                 NUM_BANKS(ADDR_SURF_16_BANK));
2617                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620                                 NUM_BANKS(ADDR_SURF_16_BANK));
2621                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2623                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2624                                 NUM_BANKS(ADDR_SURF_16_BANK));
2625                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2627                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2628                                 NUM_BANKS(ADDR_SURF_16_BANK));
2629                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2631                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2632                                 NUM_BANKS(ADDR_SURF_16_BANK));
2633                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2635                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2636                                  NUM_BANKS(ADDR_SURF_16_BANK));
2637                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2639                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2640                                  NUM_BANKS(ADDR_SURF_16_BANK));
2641                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2644                                  NUM_BANKS(ADDR_SURF_8_BANK));
2645                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2647                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2648                                  NUM_BANKS(ADDR_SURF_4_BANK));
2649                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2652                                  NUM_BANKS(ADDR_SURF_4_BANK));
2653
2654                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2655                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2656
2657                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2658                         if (reg_offset != 7)
2659                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2660
2661                 break;
2662         case CHIP_POLARIS11:
2663         case CHIP_POLARIS12:
2664                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2668                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2672                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2684                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2696                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2698                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2701                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2702                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2705                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2706                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2709                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2710                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2713                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2714                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2742                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2746                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2750                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2754                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2755                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2757                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2759                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2761                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2774                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2778                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2781                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2782                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2785                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2786
2787                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2789                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2790                                 NUM_BANKS(ADDR_SURF_16_BANK));
2791
2792                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795                                 NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800                                 NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2804                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805                                 NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2810                                 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_8_BANK));
2851
2852                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2855                                 NUM_BANKS(ADDR_SURF_4_BANK));
2856
2857                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2858                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2859
2860                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2861                         if (reg_offset != 7)
2862                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2863
2864                 break;
2865         case CHIP_POLARIS10:
2866                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2870                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2874                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2878                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2882                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2886                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2895                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2896                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2899                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2900                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2912                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2916                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2932                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2936                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2944                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2945                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2948                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2952                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2953                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2956                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2957                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2961                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2965                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2969                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2976                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2977                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2980                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2981                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2984                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2985                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2987                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2988
2989                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2991                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2992                                 NUM_BANKS(ADDR_SURF_16_BANK));
2993
2994                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997                                 NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3012                                 NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3016                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3017                                 NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3047                                 NUM_BANKS(ADDR_SURF_8_BANK));
3048
3049                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_4_BANK));
3053
3054                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                                 NUM_BANKS(ADDR_SURF_4_BANK));
3058
3059                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3060                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3061
3062                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3063                         if (reg_offset != 7)
3064                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3065
3066                 break;
3067         case CHIP_STONEY:
3068                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3069                                 PIPE_CONFIG(ADDR_SURF_P2) |
3070                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3072                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3073                                 PIPE_CONFIG(ADDR_SURF_P2) |
3074                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3076                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3077                                 PIPE_CONFIG(ADDR_SURF_P2) |
3078                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3080                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081                                 PIPE_CONFIG(ADDR_SURF_P2) |
3082                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3084                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3085                                 PIPE_CONFIG(ADDR_SURF_P2) |
3086                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3088                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P2) |
3090                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3093                                 PIPE_CONFIG(ADDR_SURF_P2) |
3094                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3096                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3097                                 PIPE_CONFIG(ADDR_SURF_P2));
3098                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3099                                 PIPE_CONFIG(ADDR_SURF_P2) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3101                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3102                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103                                  PIPE_CONFIG(ADDR_SURF_P2) |
3104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3106                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3107                                  PIPE_CONFIG(ADDR_SURF_P2) |
3108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3110                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3111                                  PIPE_CONFIG(ADDR_SURF_P2) |
3112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                  PIPE_CONFIG(ADDR_SURF_P2) |
3116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3119                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3126                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3130                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3134                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3138                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3139                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3142                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3143                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3146                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3147                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3151                                  PIPE_CONFIG(ADDR_SURF_P2) |
3152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3155                                  PIPE_CONFIG(ADDR_SURF_P2) |
3156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3158                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3159                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3162                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163                                  PIPE_CONFIG(ADDR_SURF_P2) |
3164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3166                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3167                                  PIPE_CONFIG(ADDR_SURF_P2) |
3168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3170
3171                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174                                 NUM_BANKS(ADDR_SURF_8_BANK));
3175                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178                                 NUM_BANKS(ADDR_SURF_8_BANK));
3179                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182                                 NUM_BANKS(ADDR_SURF_8_BANK));
3183                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3186                                 NUM_BANKS(ADDR_SURF_8_BANK));
3187                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3190                                 NUM_BANKS(ADDR_SURF_8_BANK));
3191                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194                                 NUM_BANKS(ADDR_SURF_8_BANK));
3195                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3198                                 NUM_BANKS(ADDR_SURF_8_BANK));
3199                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3202                                 NUM_BANKS(ADDR_SURF_16_BANK));
3203                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3204                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3205                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206                                 NUM_BANKS(ADDR_SURF_16_BANK));
3207                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3208                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3209                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210                                  NUM_BANKS(ADDR_SURF_16_BANK));
3211                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3212                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3213                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3214                                  NUM_BANKS(ADDR_SURF_16_BANK));
3215                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3217                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3218                                  NUM_BANKS(ADDR_SURF_16_BANK));
3219                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3220                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3221                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3222                                  NUM_BANKS(ADDR_SURF_16_BANK));
3223                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3224                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3225                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3226                                  NUM_BANKS(ADDR_SURF_8_BANK));
3227
3228                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3229                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3230                             reg_offset != 23)
3231                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3232
3233                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3234                         if (reg_offset != 7)
3235                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3236
3237                 break;
3238         default:
3239                 dev_warn(adev->dev,
3240                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3241                          adev->asic_type);
3242                 /* fall through */
3243
3244         case CHIP_CARRIZO:
3245                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246                                 PIPE_CONFIG(ADDR_SURF_P2) |
3247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3250                                 PIPE_CONFIG(ADDR_SURF_P2) |
3251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3254                                 PIPE_CONFIG(ADDR_SURF_P2) |
3255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3257                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258                                 PIPE_CONFIG(ADDR_SURF_P2) |
3259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3262                                 PIPE_CONFIG(ADDR_SURF_P2) |
3263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3265                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2) |
3271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3274                                 PIPE_CONFIG(ADDR_SURF_P2));
3275                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3276                                 PIPE_CONFIG(ADDR_SURF_P2) |
3277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3278                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3283                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3287                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3303                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3343                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3347
3348                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                 NUM_BANKS(ADDR_SURF_8_BANK));
3352                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                 NUM_BANKS(ADDR_SURF_8_BANK));
3356                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359                                 NUM_BANKS(ADDR_SURF_8_BANK));
3360                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3363                                 NUM_BANKS(ADDR_SURF_8_BANK));
3364                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3367                                 NUM_BANKS(ADDR_SURF_8_BANK));
3368                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3375                                 NUM_BANKS(ADDR_SURF_8_BANK));
3376                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                 NUM_BANKS(ADDR_SURF_16_BANK));
3380                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383                                 NUM_BANKS(ADDR_SURF_16_BANK));
3384                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3385                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3386                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387                                  NUM_BANKS(ADDR_SURF_16_BANK));
3388                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3389                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3390                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                  NUM_BANKS(ADDR_SURF_16_BANK));
3392                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3394                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395                                  NUM_BANKS(ADDR_SURF_16_BANK));
3396                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399                                  NUM_BANKS(ADDR_SURF_16_BANK));
3400                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403                                  NUM_BANKS(ADDR_SURF_8_BANK));
3404
3405                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3406                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3407                             reg_offset != 23)
3408                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3409
3410                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3411                         if (reg_offset != 7)
3412                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3413
3414                 break;
3415         }
3416 }
3417
3418 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3419                                   u32 se_num, u32 sh_num, u32 instance)
3420 {
3421         u32 data;
3422
3423         if (instance == 0xffffffff)
3424                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3425         else
3426                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3427
3428         if (se_num == 0xffffffff)
3429                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3430         else
3431                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3432
3433         if (sh_num == 0xffffffff)
3434                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3435         else
3436                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3437
3438         WREG32(mmGRBM_GFX_INDEX, data);
3439 }
3440
3441 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3442                                   u32 me, u32 pipe, u32 q)
3443 {
3444         vi_srbm_select(adev, me, pipe, q, 0);
3445 }
3446
3447 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3448 {
3449         u32 data, mask;
3450
3451         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3452                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3453
3454         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3455
3456         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3457                                          adev->gfx.config.max_sh_per_se);
3458
3459         return (~data) & mask;
3460 }
3461
3462 static void
3463 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3464 {
3465         switch (adev->asic_type) {
3466         case CHIP_FIJI:
3467         case CHIP_VEGAM:
3468                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3469                           RB_XSEL2(1) | PKR_MAP(2) |
3470                           PKR_XSEL(1) | PKR_YSEL(1) |
3471                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3472                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3473                            SE_PAIR_YSEL(2);
3474                 break;
3475         case CHIP_TONGA:
3476         case CHIP_POLARIS10:
3477                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3478                           SE_XSEL(1) | SE_YSEL(1);
3479                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3480                            SE_PAIR_YSEL(2);
3481                 break;
3482         case CHIP_TOPAZ:
3483         case CHIP_CARRIZO:
3484                 *rconf |= RB_MAP_PKR0(2);
3485                 *rconf1 |= 0x0;
3486                 break;
3487         case CHIP_POLARIS11:
3488         case CHIP_POLARIS12:
3489                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3490                           SE_XSEL(1) | SE_YSEL(1);
3491                 *rconf1 |= 0x0;
3492                 break;
3493         case CHIP_STONEY:
3494                 *rconf |= 0x0;
3495                 *rconf1 |= 0x0;
3496                 break;
3497         default:
3498                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3499                 break;
3500         }
3501 }
3502
3503 static void
3504 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3505                                         u32 raster_config, u32 raster_config_1,
3506                                         unsigned rb_mask, unsigned num_rb)
3507 {
3508         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3509         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3510         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3511         unsigned rb_per_se = num_rb / num_se;
3512         unsigned se_mask[4];
3513         unsigned se;
3514
3515         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3516         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3517         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3518         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3519
3520         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3521         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3522         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3523
3524         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3525                              (!se_mask[2] && !se_mask[3]))) {
3526                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3527
3528                 if (!se_mask[0] && !se_mask[1]) {
3529                         raster_config_1 |=
3530                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3531                 } else {
3532                         raster_config_1 |=
3533                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3534                 }
3535         }
3536
3537         for (se = 0; se < num_se; se++) {
3538                 unsigned raster_config_se = raster_config;
3539                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3540                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3541                 int idx = (se / 2) * 2;
3542
3543                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3544                         raster_config_se &= ~SE_MAP_MASK;
3545
3546                         if (!se_mask[idx]) {
3547                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3548                         } else {
3549                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3550                         }
3551                 }
3552
3553                 pkr0_mask &= rb_mask;
3554                 pkr1_mask &= rb_mask;
3555                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3556                         raster_config_se &= ~PKR_MAP_MASK;
3557
3558                         if (!pkr0_mask) {
3559                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3560                         } else {
3561                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3562                         }
3563                 }
3564
3565                 if (rb_per_se >= 2) {
3566                         unsigned rb0_mask = 1 << (se * rb_per_se);
3567                         unsigned rb1_mask = rb0_mask << 1;
3568
3569                         rb0_mask &= rb_mask;
3570                         rb1_mask &= rb_mask;
3571                         if (!rb0_mask || !rb1_mask) {
3572                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3573
3574                                 if (!rb0_mask) {
3575                                         raster_config_se |=
3576                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3577                                 } else {
3578                                         raster_config_se |=
3579                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3580                                 }
3581                         }
3582
3583                         if (rb_per_se > 2) {
3584                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3585                                 rb1_mask = rb0_mask << 1;
3586                                 rb0_mask &= rb_mask;
3587                                 rb1_mask &= rb_mask;
3588                                 if (!rb0_mask || !rb1_mask) {
3589                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3590
3591                                         if (!rb0_mask) {
3592                                                 raster_config_se |=
3593                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3594                                         } else {
3595                                                 raster_config_se |=
3596                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3597                                         }
3598                                 }
3599                         }
3600                 }
3601
3602                 /* GRBM_GFX_INDEX has a different offset on VI */
3603                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3604                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3605                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3606         }
3607
3608         /* GRBM_GFX_INDEX has a different offset on VI */
3609         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3610 }
3611
3612 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3613 {
3614         int i, j;
3615         u32 data;
3616         u32 raster_config = 0, raster_config_1 = 0;
3617         u32 active_rbs = 0;
3618         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3619                                         adev->gfx.config.max_sh_per_se;
3620         unsigned num_rb_pipes;
3621
3622         mutex_lock(&adev->grbm_idx_mutex);
3623         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3624                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3625                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3626                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3627                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3628                                                rb_bitmap_width_per_sh);
3629                 }
3630         }
3631         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3632
3633         adev->gfx.config.backend_enable_mask = active_rbs;
3634         adev->gfx.config.num_rbs = hweight32(active_rbs);
3635
3636         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3637                              adev->gfx.config.max_shader_engines, 16);
3638
3639         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3640
3641         if (!adev->gfx.config.backend_enable_mask ||
3642                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3643                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3644                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3645         } else {
3646                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3647                                                         adev->gfx.config.backend_enable_mask,
3648                                                         num_rb_pipes);
3649         }
3650
3651         /* cache the values for userspace */
3652         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3653                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3654                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3655                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3656                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3657                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3658                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3659                         adev->gfx.config.rb_config[i][j].raster_config =
3660                                 RREG32(mmPA_SC_RASTER_CONFIG);
3661                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3662                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3663                 }
3664         }
3665         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3666         mutex_unlock(&adev->grbm_idx_mutex);
3667 }
3668
3669 /**
3670  * gfx_v8_0_init_compute_vmid - gart enable
3671  *
3672  * @adev: amdgpu_device pointer
3673  *
3674  * Initialize compute vmid sh_mem registers
3675  *
3676  */
3677 #define DEFAULT_SH_MEM_BASES    (0x6000)
3678 #define FIRST_COMPUTE_VMID      (8)
3679 #define LAST_COMPUTE_VMID       (16)
3680 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3681 {
3682         int i;
3683         uint32_t sh_mem_config;
3684         uint32_t sh_mem_bases;
3685
3686         /*
3687          * Configure apertures:
3688          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3689          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3690          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3691          */
3692         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3693
3694         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3695                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3696                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3697                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3698                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3699                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3700
3701         mutex_lock(&adev->srbm_mutex);
3702         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3703                 vi_srbm_select(adev, 0, 0, 0, i);
3704                 /* CP and shaders */
3705                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3706                 WREG32(mmSH_MEM_APE1_BASE, 1);
3707                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3708                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3709         }
3710         vi_srbm_select(adev, 0, 0, 0, 0);
3711         mutex_unlock(&adev->srbm_mutex);
3712 }
3713
3714 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3715 {
3716         switch (adev->asic_type) {
3717         default:
3718                 adev->gfx.config.double_offchip_lds_buf = 1;
3719                 break;
3720         case CHIP_CARRIZO:
3721         case CHIP_STONEY:
3722                 adev->gfx.config.double_offchip_lds_buf = 0;
3723                 break;
3724         }
3725 }
3726
3727 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3728 {
3729         u32 tmp, sh_static_mem_cfg;
3730         int i;
3731
3732         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3733         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3734         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3735         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3736
3737         gfx_v8_0_tiling_mode_table_init(adev);
3738         gfx_v8_0_setup_rb(adev);
3739         gfx_v8_0_get_cu_info(adev);
3740         gfx_v8_0_config_init(adev);
3741
3742         /* XXX SH_MEM regs */
3743         /* where to put LDS, scratch, GPUVM in FSA64 space */
3744         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3745                                    SWIZZLE_ENABLE, 1);
3746         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3747                                    ELEMENT_SIZE, 1);
3748         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3749                                    INDEX_STRIDE, 3);
3750         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3751
3752         mutex_lock(&adev->srbm_mutex);
3753         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3754                 vi_srbm_select(adev, 0, 0, 0, i);
3755                 /* CP and shaders */
3756                 if (i == 0) {
3757                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3758                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3759                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3760                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3761                         WREG32(mmSH_MEM_CONFIG, tmp);
3762                         WREG32(mmSH_MEM_BASES, 0);
3763                 } else {
3764                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3765                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3766                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3767                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3768                         WREG32(mmSH_MEM_CONFIG, tmp);
3769                         tmp = adev->gmc.shared_aperture_start >> 48;
3770                         WREG32(mmSH_MEM_BASES, tmp);
3771                 }
3772
3773                 WREG32(mmSH_MEM_APE1_BASE, 1);
3774                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3775         }
3776         vi_srbm_select(adev, 0, 0, 0, 0);
3777         mutex_unlock(&adev->srbm_mutex);
3778
3779         gfx_v8_0_init_compute_vmid(adev);
3780
3781         mutex_lock(&adev->grbm_idx_mutex);
3782         /*
3783          * making sure that the following register writes will be broadcasted
3784          * to all the shaders
3785          */
3786         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3787
3788         WREG32(mmPA_SC_FIFO_SIZE,
3789                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3790                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3791                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3792                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3793                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3794                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3795                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3796                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3797
3798         tmp = RREG32(mmSPI_ARB_PRIORITY);
3799         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3800         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3801         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3802         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3803         WREG32(mmSPI_ARB_PRIORITY, tmp);
3804
3805         mutex_unlock(&adev->grbm_idx_mutex);
3806
3807 }
3808
3809 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3810 {
3811         u32 i, j, k;
3812         u32 mask;
3813
3814         mutex_lock(&adev->grbm_idx_mutex);
3815         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3816                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3817                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3818                         for (k = 0; k < adev->usec_timeout; k++) {
3819                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3820                                         break;
3821                                 udelay(1);
3822                         }
3823                         if (k == adev->usec_timeout) {
3824                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3825                                                       0xffffffff, 0xffffffff);
3826                                 mutex_unlock(&adev->grbm_idx_mutex);
3827                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3828                                          i, j);
3829                                 return;
3830                         }
3831                 }
3832         }
3833         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3834         mutex_unlock(&adev->grbm_idx_mutex);
3835
3836         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3837                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3838                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3839                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3840         for (k = 0; k < adev->usec_timeout; k++) {
3841                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3842                         break;
3843                 udelay(1);
3844         }
3845 }
3846
3847 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3848                                                bool enable)
3849 {
3850         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3851
3852         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3853         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3854         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3855         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3856
3857         WREG32(mmCP_INT_CNTL_RING0, tmp);
3858 }
3859
3860 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3861 {
3862         /* csib */
3863         WREG32(mmRLC_CSIB_ADDR_HI,
3864                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3865         WREG32(mmRLC_CSIB_ADDR_LO,
3866                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3867         WREG32(mmRLC_CSIB_LENGTH,
3868                         adev->gfx.rlc.clear_state_size);
3869 }
3870
3871 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3872                                 int ind_offset,
3873                                 int list_size,
3874                                 int *unique_indices,
3875                                 int *indices_count,
3876                                 int max_indices,
3877                                 int *ind_start_offsets,
3878                                 int *offset_count,
3879                                 int max_offset)
3880 {
3881         int indices;
3882         bool new_entry = true;
3883
3884         for (; ind_offset < list_size; ind_offset++) {
3885
3886                 if (new_entry) {
3887                         new_entry = false;
3888                         ind_start_offsets[*offset_count] = ind_offset;
3889                         *offset_count = *offset_count + 1;
3890                         BUG_ON(*offset_count >= max_offset);
3891                 }
3892
3893                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3894                         new_entry = true;
3895                         continue;
3896                 }
3897
3898                 ind_offset += 2;
3899
3900                 /* look for the matching indice */
3901                 for (indices = 0;
3902                         indices < *indices_count;
3903                         indices++) {
3904                         if (unique_indices[indices] ==
3905                                 register_list_format[ind_offset])
3906                                 break;
3907                 }
3908
3909                 if (indices >= *indices_count) {
3910                         unique_indices[*indices_count] =
3911                                 register_list_format[ind_offset];
3912                         indices = *indices_count;
3913                         *indices_count = *indices_count + 1;
3914                         BUG_ON(*indices_count >= max_indices);
3915                 }
3916
3917                 register_list_format[ind_offset] = indices;
3918         }
3919 }
3920
3921 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3922 {
3923         int i, temp, data;
3924         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3925         int indices_count = 0;
3926         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3927         int offset_count = 0;
3928
3929         int list_size;
3930         unsigned int *register_list_format =
3931                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3932         if (!register_list_format)
3933                 return -ENOMEM;
3934         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3935                         adev->gfx.rlc.reg_list_format_size_bytes);
3936
3937         gfx_v8_0_parse_ind_reg_list(register_list_format,
3938                                 RLC_FormatDirectRegListLength,
3939                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3940                                 unique_indices,
3941                                 &indices_count,
3942                                 ARRAY_SIZE(unique_indices),
3943                                 indirect_start_offsets,
3944                                 &offset_count,
3945                                 ARRAY_SIZE(indirect_start_offsets));
3946
3947         /* save and restore list */
3948         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3949
3950         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3951         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3952                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3953
3954         /* indirect list */
3955         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3956         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3957                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3958
3959         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3960         list_size = list_size >> 1;
3961         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3962         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3963
3964         /* starting offsets starts */
3965         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3966                 adev->gfx.rlc.starting_offsets_start);
3967         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3968                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3969                                 indirect_start_offsets[i]);
3970
3971         /* unique indices */
3972         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3973         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3974         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3975                 if (unique_indices[i] != 0) {
3976                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3977                         WREG32(data + i, unique_indices[i] >> 20);
3978                 }
3979         }
3980         kfree(register_list_format);
3981
3982         return 0;
3983 }
3984
3985 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3986 {
3987         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3988 }
3989
3990 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3991 {
3992         uint32_t data;
3993
3994         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3995
3996         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3997         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3998         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3999         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4000         WREG32(mmRLC_PG_DELAY, data);
4001
4002         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4003         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4004
4005 }
4006
4007 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4008                                                 bool enable)
4009 {
4010         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4011 }
4012
4013 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4014                                                   bool enable)
4015 {
4016         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4017 }
4018
4019 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4020 {
4021         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4022 }
4023
4024 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4025 {
4026         if ((adev->asic_type == CHIP_CARRIZO) ||
4027             (adev->asic_type == CHIP_STONEY)) {
4028                 gfx_v8_0_init_csb(adev);
4029                 gfx_v8_0_init_save_restore_list(adev);
4030                 gfx_v8_0_enable_save_restore_machine(adev);
4031                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4032                 gfx_v8_0_init_power_gating(adev);
4033                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4034         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4035                    (adev->asic_type == CHIP_POLARIS12) ||
4036                    (adev->asic_type == CHIP_VEGAM)) {
4037                 gfx_v8_0_init_csb(adev);
4038                 gfx_v8_0_init_save_restore_list(adev);
4039                 gfx_v8_0_enable_save_restore_machine(adev);
4040                 gfx_v8_0_init_power_gating(adev);
4041         }
4042
4043 }
4044
4045 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4046 {
4047         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4048
4049         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4050         gfx_v8_0_wait_for_rlc_serdes(adev);
4051 }
4052
4053 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4054 {
4055         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4056         udelay(50);
4057
4058         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4059         udelay(50);
4060 }
4061
4062 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4063 {
4064         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4065
4066         /* carrizo do enable cp interrupt after cp inited */
4067         if (!(adev->flags & AMD_IS_APU))
4068                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4069
4070         udelay(50);
4071 }
4072
4073 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4074 {
4075         if (amdgpu_sriov_vf(adev)) {
4076                 gfx_v8_0_init_csb(adev);
4077                 return 0;
4078         }
4079
4080         adev->gfx.rlc.funcs->stop(adev);
4081         adev->gfx.rlc.funcs->reset(adev);
4082         gfx_v8_0_init_pg(adev);
4083         adev->gfx.rlc.funcs->start(adev);
4084
4085         return 0;
4086 }
4087
4088 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4089 {
4090         int i;
4091         u32 tmp = RREG32(mmCP_ME_CNTL);
4092
4093         if (enable) {
4094                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4095                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4096                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4097         } else {
4098                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4099                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4100                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4101                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4102                         adev->gfx.gfx_ring[i].sched.ready = false;
4103         }
4104         WREG32(mmCP_ME_CNTL, tmp);
4105         udelay(50);
4106 }
4107
4108 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4109 {
4110         u32 count = 0;
4111         const struct cs_section_def *sect = NULL;
4112         const struct cs_extent_def *ext = NULL;
4113
4114         /* begin clear state */
4115         count += 2;
4116         /* context control state */
4117         count += 3;
4118
4119         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4120                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4121                         if (sect->id == SECT_CONTEXT)
4122                                 count += 2 + ext->reg_count;
4123                         else
4124                                 return 0;
4125                 }
4126         }
4127         /* pa_sc_raster_config/pa_sc_raster_config1 */
4128         count += 4;
4129         /* end clear state */
4130         count += 2;
4131         /* clear state */
4132         count += 2;
4133
4134         return count;
4135 }
4136
4137 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4138 {
4139         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4140         const struct cs_section_def *sect = NULL;
4141         const struct cs_extent_def *ext = NULL;
4142         int r, i;
4143
4144         /* init the CP */
4145         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4146         WREG32(mmCP_ENDIAN_SWAP, 0);
4147         WREG32(mmCP_DEVICE_ID, 1);
4148
4149         gfx_v8_0_cp_gfx_enable(adev, true);
4150
4151         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4152         if (r) {
4153                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4154                 return r;
4155         }
4156
4157         /* clear state buffer */
4158         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4159         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4160
4161         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4162         amdgpu_ring_write(ring, 0x80000000);
4163         amdgpu_ring_write(ring, 0x80000000);
4164
4165         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4166                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4167                         if (sect->id == SECT_CONTEXT) {
4168                                 amdgpu_ring_write(ring,
4169                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4170                                                ext->reg_count));
4171                                 amdgpu_ring_write(ring,
4172                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4173                                 for (i = 0; i < ext->reg_count; i++)
4174                                         amdgpu_ring_write(ring, ext->extent[i]);
4175                         }
4176                 }
4177         }
4178
4179         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4180         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4181         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4182         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4183
4184         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4185         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4186
4187         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4188         amdgpu_ring_write(ring, 0);
4189
4190         /* init the CE partitions */
4191         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4192         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4193         amdgpu_ring_write(ring, 0x8000);
4194         amdgpu_ring_write(ring, 0x8000);
4195
4196         amdgpu_ring_commit(ring);
4197
4198         return 0;
4199 }
4200 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4201 {
4202         u32 tmp;
4203         /* no gfx doorbells on iceland */
4204         if (adev->asic_type == CHIP_TOPAZ)
4205                 return;
4206
4207         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4208
4209         if (ring->use_doorbell) {
4210                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4211                                 DOORBELL_OFFSET, ring->doorbell_index);
4212                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4213                                                 DOORBELL_HIT, 0);
4214                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4215                                             DOORBELL_EN, 1);
4216         } else {
4217                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4218         }
4219
4220         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4221
4222         if (adev->flags & AMD_IS_APU)
4223                 return;
4224
4225         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4226                                         DOORBELL_RANGE_LOWER,
4227                                         adev->doorbell_index.gfx_ring0);
4228         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4229
4230         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4231                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4232 }
4233
4234 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4235 {
4236         struct amdgpu_ring *ring;
4237         u32 tmp;
4238         u32 rb_bufsz;
4239         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4240
4241         /* Set the write pointer delay */
4242         WREG32(mmCP_RB_WPTR_DELAY, 0);
4243
4244         /* set the RB to use vmid 0 */
4245         WREG32(mmCP_RB_VMID, 0);
4246
4247         /* Set ring buffer size */
4248         ring = &adev->gfx.gfx_ring[0];
4249         rb_bufsz = order_base_2(ring->ring_size / 8);
4250         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4251         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4252         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4253         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4254 #ifdef __BIG_ENDIAN
4255         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4256 #endif
4257         WREG32(mmCP_RB0_CNTL, tmp);
4258
4259         /* Initialize the ring buffer's read and write pointers */
4260         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4261         ring->wptr = 0;
4262         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4263
4264         /* set the wb address wether it's enabled or not */
4265         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4266         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4267         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4268
4269         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4270         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4271         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4272         mdelay(1);
4273         WREG32(mmCP_RB0_CNTL, tmp);
4274
4275         rb_addr = ring->gpu_addr >> 8;
4276         WREG32(mmCP_RB0_BASE, rb_addr);
4277         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4278
4279         gfx_v8_0_set_cpg_door_bell(adev, ring);
4280         /* start the ring */
4281         amdgpu_ring_clear_ring(ring);
4282         gfx_v8_0_cp_gfx_start(adev);
4283         ring->sched.ready = true;
4284
4285         return 0;
4286 }
4287
4288 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4289 {
4290         int i;
4291
4292         if (enable) {
4293                 WREG32(mmCP_MEC_CNTL, 0);
4294         } else {
4295                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4296                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4297                         adev->gfx.compute_ring[i].sched.ready = false;
4298                 adev->gfx.kiq.ring.sched.ready = false;
4299         }
4300         udelay(50);
4301 }
4302
4303 /* KIQ functions */
4304 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4305 {
4306         uint32_t tmp;
4307         struct amdgpu_device *adev = ring->adev;
4308
4309         /* tell RLC which is KIQ queue */
4310         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4311         tmp &= 0xffffff00;
4312         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4313         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4314         tmp |= 0x80;
4315         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4316 }
4317
4318 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4319 {
4320         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4321         uint64_t queue_mask = 0;
4322         int r, i;
4323
4324         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4325                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4326                         continue;
4327
4328                 /* This situation may be hit in the future if a new HW
4329                  * generation exposes more than 64 queues. If so, the
4330                  * definition of queue_mask needs updating */
4331                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4332                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4333                         break;
4334                 }
4335
4336                 queue_mask |= (1ull << i);
4337         }
4338
4339         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4340         if (r) {
4341                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4342                 return r;
4343         }
4344         /* set resources */
4345         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4346         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4347         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4348         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4349         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4350         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4351         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4352         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4353         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4354                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4355                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4356                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4357
4358                 /* map queues */
4359                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4360                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4361                 amdgpu_ring_write(kiq_ring,
4362                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4363                 amdgpu_ring_write(kiq_ring,
4364                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4365                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4366                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4367                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4368                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4369                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4370                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4371                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4372         }
4373
4374         amdgpu_ring_commit(kiq_ring);
4375
4376         return 0;
4377 }
4378
4379 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4380 {
4381         int i, r = 0;
4382
4383         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4384                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4385                 for (i = 0; i < adev->usec_timeout; i++) {
4386                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4387                                 break;
4388                         udelay(1);
4389                 }
4390                 if (i == adev->usec_timeout)
4391                         r = -ETIMEDOUT;
4392         }
4393         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4394         WREG32(mmCP_HQD_PQ_RPTR, 0);
4395         WREG32(mmCP_HQD_PQ_WPTR, 0);
4396
4397         return r;
4398 }
4399
4400 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4401 {
4402         struct amdgpu_device *adev = ring->adev;
4403         struct vi_mqd *mqd = ring->mqd_ptr;
4404         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4405         uint32_t tmp;
4406
4407         mqd->header = 0xC0310800;
4408         mqd->compute_pipelinestat_enable = 0x00000001;
4409         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4410         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4411         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4412         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4413         mqd->compute_misc_reserved = 0x00000003;
4414         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4415                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4416         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4417                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4418         eop_base_addr = ring->eop_gpu_addr >> 8;
4419         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4420         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4421
4422         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4423         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4424         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4425                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4426
4427         mqd->cp_hqd_eop_control = tmp;
4428
4429         /* enable doorbell? */
4430         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4431                             CP_HQD_PQ_DOORBELL_CONTROL,
4432                             DOORBELL_EN,
4433                             ring->use_doorbell ? 1 : 0);
4434
4435         mqd->cp_hqd_pq_doorbell_control = tmp;
4436
4437         /* set the pointer to the MQD */
4438         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4439         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4440
4441         /* set MQD vmid to 0 */
4442         tmp = RREG32(mmCP_MQD_CONTROL);
4443         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4444         mqd->cp_mqd_control = tmp;
4445
4446         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4447         hqd_gpu_addr = ring->gpu_addr >> 8;
4448         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4449         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4450
4451         /* set up the HQD, this is similar to CP_RB0_CNTL */
4452         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4453         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4454                             (order_base_2(ring->ring_size / 4) - 1));
4455         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4456                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4457 #ifdef __BIG_ENDIAN
4458         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4459 #endif
4460         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4461         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4462         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4463         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4464         mqd->cp_hqd_pq_control = tmp;
4465
4466         /* set the wb address whether it's enabled or not */
4467         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4468         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4469         mqd->cp_hqd_pq_rptr_report_addr_hi =
4470                 upper_32_bits(wb_gpu_addr) & 0xffff;
4471
4472         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4473         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4474         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4475         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4476
4477         tmp = 0;
4478         /* enable the doorbell if requested */
4479         if (ring->use_doorbell) {
4480                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4481                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4482                                 DOORBELL_OFFSET, ring->doorbell_index);
4483
4484                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4485                                          DOORBELL_EN, 1);
4486                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4487                                          DOORBELL_SOURCE, 0);
4488                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4489                                          DOORBELL_HIT, 0);
4490         }
4491
4492         mqd->cp_hqd_pq_doorbell_control = tmp;
4493
4494         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4495         ring->wptr = 0;
4496         mqd->cp_hqd_pq_wptr = ring->wptr;
4497         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4498
4499         /* set the vmid for the queue */
4500         mqd->cp_hqd_vmid = 0;
4501
4502         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4503         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4504         mqd->cp_hqd_persistent_state = tmp;
4505
4506         /* set MTYPE */
4507         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4508         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4509         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4510         mqd->cp_hqd_ib_control = tmp;
4511
4512         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4513         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4514         mqd->cp_hqd_iq_timer = tmp;
4515
4516         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4517         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4518         mqd->cp_hqd_ctx_save_control = tmp;
4519
4520         /* defaults */
4521         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4522         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4523         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4524         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4525         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4526         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4527         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4528         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4529         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4530         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4531         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4532         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4533         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4534         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4535         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4536
4537         /* activate the queue */
4538         mqd->cp_hqd_active = 1;
4539
4540         return 0;
4541 }
4542
4543 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4544                         struct vi_mqd *mqd)
4545 {
4546         uint32_t mqd_reg;
4547         uint32_t *mqd_data;
4548
4549         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4550         mqd_data = &mqd->cp_mqd_base_addr_lo;
4551
4552         /* disable wptr polling */
4553         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4554
4555         /* program all HQD registers */
4556         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4557                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4558
4559         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4560          * This is safe since EOP RPTR==WPTR for any inactive HQD
4561          * on ASICs that do not support context-save.
4562          * EOP writes/reads can start anywhere in the ring.
4563          */
4564         if (adev->asic_type != CHIP_TONGA) {
4565                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4566                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4567                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4568         }
4569
4570         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4571                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4572
4573         /* activate the HQD */
4574         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4575                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4576
4577         return 0;
4578 }
4579
4580 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4581 {
4582         struct amdgpu_device *adev = ring->adev;
4583         struct vi_mqd *mqd = ring->mqd_ptr;
4584         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4585
4586         gfx_v8_0_kiq_setting(ring);
4587
4588         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4589                 /* reset MQD to a clean status */
4590                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4591                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4592
4593                 /* reset ring buffer */
4594                 ring->wptr = 0;
4595                 amdgpu_ring_clear_ring(ring);
4596                 mutex_lock(&adev->srbm_mutex);
4597                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4598                 gfx_v8_0_mqd_commit(adev, mqd);
4599                 vi_srbm_select(adev, 0, 0, 0, 0);
4600                 mutex_unlock(&adev->srbm_mutex);
4601         } else {
4602                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4603                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4604                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4605                 mutex_lock(&adev->srbm_mutex);
4606                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4607                 gfx_v8_0_mqd_init(ring);
4608                 gfx_v8_0_mqd_commit(adev, mqd);
4609                 vi_srbm_select(adev, 0, 0, 0, 0);
4610                 mutex_unlock(&adev->srbm_mutex);
4611
4612                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4613                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4614         }
4615
4616         return 0;
4617 }
4618
4619 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4620 {
4621         struct amdgpu_device *adev = ring->adev;
4622         struct vi_mqd *mqd = ring->mqd_ptr;
4623         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4624
4625         if (!adev->in_gpu_reset && !adev->in_suspend) {
4626                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4627                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4628                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4629                 mutex_lock(&adev->srbm_mutex);
4630                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4631                 gfx_v8_0_mqd_init(ring);
4632                 vi_srbm_select(adev, 0, 0, 0, 0);
4633                 mutex_unlock(&adev->srbm_mutex);
4634
4635                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4636                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4637         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4638                 /* reset MQD to a clean status */
4639                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4640                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4641                 /* reset ring buffer */
4642                 ring->wptr = 0;
4643                 amdgpu_ring_clear_ring(ring);
4644         } else {
4645                 amdgpu_ring_clear_ring(ring);
4646         }
4647         return 0;
4648 }
4649
4650 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4651 {
4652         if (adev->asic_type > CHIP_TONGA) {
4653                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4654                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4655         }
4656         /* enable doorbells */
4657         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4658 }
4659
4660 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4661 {
4662         struct amdgpu_ring *ring;
4663         int r;
4664
4665         ring = &adev->gfx.kiq.ring;
4666
4667         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4668         if (unlikely(r != 0))
4669                 return r;
4670
4671         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4672         if (unlikely(r != 0))
4673                 return r;
4674
4675         gfx_v8_0_kiq_init_queue(ring);
4676         amdgpu_bo_kunmap(ring->mqd_obj);
4677         ring->mqd_ptr = NULL;
4678         amdgpu_bo_unreserve(ring->mqd_obj);
4679         ring->sched.ready = true;
4680         return 0;
4681 }
4682
4683 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4684 {
4685         struct amdgpu_ring *ring = NULL;
4686         int r = 0, i;
4687
4688         gfx_v8_0_cp_compute_enable(adev, true);
4689
4690         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4691                 ring = &adev->gfx.compute_ring[i];
4692
4693                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4694                 if (unlikely(r != 0))
4695                         goto done;
4696                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4697                 if (!r) {
4698                         r = gfx_v8_0_kcq_init_queue(ring);
4699                         amdgpu_bo_kunmap(ring->mqd_obj);
4700                         ring->mqd_ptr = NULL;
4701                 }
4702                 amdgpu_bo_unreserve(ring->mqd_obj);
4703                 if (r)
4704                         goto done;
4705         }
4706
4707         gfx_v8_0_set_mec_doorbell_range(adev);
4708
4709         r = gfx_v8_0_kiq_kcq_enable(adev);
4710         if (r)
4711                 goto done;
4712
4713 done:
4714         return r;
4715 }
4716
4717 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4718 {
4719         int r, i;
4720         struct amdgpu_ring *ring;
4721
4722         /* collect all the ring_tests here, gfx, kiq, compute */
4723         ring = &adev->gfx.gfx_ring[0];
4724         r = amdgpu_ring_test_helper(ring);
4725         if (r)
4726                 return r;
4727
4728         ring = &adev->gfx.kiq.ring;
4729         r = amdgpu_ring_test_helper(ring);
4730         if (r)
4731                 return r;
4732
4733         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4734                 ring = &adev->gfx.compute_ring[i];
4735                 amdgpu_ring_test_helper(ring);
4736         }
4737
4738         return 0;
4739 }
4740
4741 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4742 {
4743         int r;
4744
4745         if (!(adev->flags & AMD_IS_APU))
4746                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4747
4748         r = gfx_v8_0_kiq_resume(adev);
4749         if (r)
4750                 return r;
4751
4752         r = gfx_v8_0_cp_gfx_resume(adev);
4753         if (r)
4754                 return r;
4755
4756         r = gfx_v8_0_kcq_resume(adev);
4757         if (r)
4758                 return r;
4759
4760         r = gfx_v8_0_cp_test_all_rings(adev);
4761         if (r)
4762                 return r;
4763
4764         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4765
4766         return 0;
4767 }
4768
4769 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4770 {
4771         gfx_v8_0_cp_gfx_enable(adev, enable);
4772         gfx_v8_0_cp_compute_enable(adev, enable);
4773 }
4774
4775 static int gfx_v8_0_hw_init(void *handle)
4776 {
4777         int r;
4778         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4779
4780         gfx_v8_0_init_golden_registers(adev);
4781         gfx_v8_0_constants_init(adev);
4782
4783         r = adev->gfx.rlc.funcs->resume(adev);
4784         if (r)
4785                 return r;
4786
4787         r = gfx_v8_0_cp_resume(adev);
4788
4789         return r;
4790 }
4791
4792 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4793 {
4794         int r, i;
4795         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4796
4797         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4798         if (r)
4799                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4800
4801         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4802                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4803
4804                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4805                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4806                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4807                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4808                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4809                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4810                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4811                 amdgpu_ring_write(kiq_ring, 0);
4812                 amdgpu_ring_write(kiq_ring, 0);
4813                 amdgpu_ring_write(kiq_ring, 0);
4814         }
4815         r = amdgpu_ring_test_helper(kiq_ring);
4816         if (r)
4817                 DRM_ERROR("KCQ disable failed\n");
4818
4819         return r;
4820 }
4821
4822 static bool gfx_v8_0_is_idle(void *handle)
4823 {
4824         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4825
4826         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4827                 || RREG32(mmGRBM_STATUS2) != 0x8)
4828                 return false;
4829         else
4830                 return true;
4831 }
4832
4833 static bool gfx_v8_0_rlc_is_idle(void *handle)
4834 {
4835         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836
4837         if (RREG32(mmGRBM_STATUS2) != 0x8)
4838                 return false;
4839         else
4840                 return true;
4841 }
4842
4843 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4844 {
4845         unsigned int i;
4846         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4847
4848         for (i = 0; i < adev->usec_timeout; i++) {
4849                 if (gfx_v8_0_rlc_is_idle(handle))
4850                         return 0;
4851
4852                 udelay(1);
4853         }
4854         return -ETIMEDOUT;
4855 }
4856
4857 static int gfx_v8_0_wait_for_idle(void *handle)
4858 {
4859         unsigned int i;
4860         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4861
4862         for (i = 0; i < adev->usec_timeout; i++) {
4863                 if (gfx_v8_0_is_idle(handle))
4864                         return 0;
4865
4866                 udelay(1);
4867         }
4868         return -ETIMEDOUT;
4869 }
4870
4871 static int gfx_v8_0_hw_fini(void *handle)
4872 {
4873         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4874
4875         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4876         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4877
4878         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4879
4880         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4881
4882         /* disable KCQ to avoid CPC touch memory not valid anymore */
4883         gfx_v8_0_kcq_disable(adev);
4884
4885         if (amdgpu_sriov_vf(adev)) {
4886                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4887                 return 0;
4888         }
4889         amdgpu_gfx_rlc_enter_safe_mode(adev);
4890         if (!gfx_v8_0_wait_for_idle(adev))
4891                 gfx_v8_0_cp_enable(adev, false);
4892         else
4893                 pr_err("cp is busy, skip halt cp\n");
4894         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4895                 adev->gfx.rlc.funcs->stop(adev);
4896         else
4897                 pr_err("rlc is busy, skip halt rlc\n");
4898         amdgpu_gfx_rlc_exit_safe_mode(adev);
4899         return 0;
4900 }
4901
4902 static int gfx_v8_0_suspend(void *handle)
4903 {
4904         return gfx_v8_0_hw_fini(handle);
4905 }
4906
4907 static int gfx_v8_0_resume(void *handle)
4908 {
4909         return gfx_v8_0_hw_init(handle);
4910 }
4911
4912 static bool gfx_v8_0_check_soft_reset(void *handle)
4913 {
4914         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4915         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4916         u32 tmp;
4917
4918         /* GRBM_STATUS */
4919         tmp = RREG32(mmGRBM_STATUS);
4920         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4921                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4922                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4923                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4924                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4925                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4926                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4927                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4928                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4929                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4930                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4931                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4932                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4933         }
4934
4935         /* GRBM_STATUS2 */
4936         tmp = RREG32(mmGRBM_STATUS2);
4937         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4938                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4939                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4940
4941         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4942             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4943             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4944                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4945                                                 SOFT_RESET_CPF, 1);
4946                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4947                                                 SOFT_RESET_CPC, 1);
4948                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4949                                                 SOFT_RESET_CPG, 1);
4950                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4951                                                 SOFT_RESET_GRBM, 1);
4952         }
4953
4954         /* SRBM_STATUS */
4955         tmp = RREG32(mmSRBM_STATUS);
4956         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4957                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4958                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4959         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4960                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4961                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4962
4963         if (grbm_soft_reset || srbm_soft_reset) {
4964                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4965                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4966                 return true;
4967         } else {
4968                 adev->gfx.grbm_soft_reset = 0;
4969                 adev->gfx.srbm_soft_reset = 0;
4970                 return false;
4971         }
4972 }
4973
4974 static int gfx_v8_0_pre_soft_reset(void *handle)
4975 {
4976         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4977         u32 grbm_soft_reset = 0;
4978
4979         if ((!adev->gfx.grbm_soft_reset) &&
4980             (!adev->gfx.srbm_soft_reset))
4981                 return 0;
4982
4983         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4984
4985         /* stop the rlc */
4986         adev->gfx.rlc.funcs->stop(adev);
4987
4988         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4989             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4990                 /* Disable GFX parsing/prefetching */
4991                 gfx_v8_0_cp_gfx_enable(adev, false);
4992
4993         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4994             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4995             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4996             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4997                 int i;
4998
4999                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5000                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5001
5002                         mutex_lock(&adev->srbm_mutex);
5003                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5004                         gfx_v8_0_deactivate_hqd(adev, 2);
5005                         vi_srbm_select(adev, 0, 0, 0, 0);
5006                         mutex_unlock(&adev->srbm_mutex);
5007                 }
5008                 /* Disable MEC parsing/prefetching */
5009                 gfx_v8_0_cp_compute_enable(adev, false);
5010         }
5011
5012        return 0;
5013 }
5014
5015 static int gfx_v8_0_soft_reset(void *handle)
5016 {
5017         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5018         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5019         u32 tmp;
5020
5021         if ((!adev->gfx.grbm_soft_reset) &&
5022             (!adev->gfx.srbm_soft_reset))
5023                 return 0;
5024
5025         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5026         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5027
5028         if (grbm_soft_reset || srbm_soft_reset) {
5029                 tmp = RREG32(mmGMCON_DEBUG);
5030                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5031                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5032                 WREG32(mmGMCON_DEBUG, tmp);
5033                 udelay(50);
5034         }
5035
5036         if (grbm_soft_reset) {
5037                 tmp = RREG32(mmGRBM_SOFT_RESET);
5038                 tmp |= grbm_soft_reset;
5039                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5040                 WREG32(mmGRBM_SOFT_RESET, tmp);
5041                 tmp = RREG32(mmGRBM_SOFT_RESET);
5042
5043                 udelay(50);
5044
5045                 tmp &= ~grbm_soft_reset;
5046                 WREG32(mmGRBM_SOFT_RESET, tmp);
5047                 tmp = RREG32(mmGRBM_SOFT_RESET);
5048         }
5049
5050         if (srbm_soft_reset) {
5051                 tmp = RREG32(mmSRBM_SOFT_RESET);
5052                 tmp |= srbm_soft_reset;
5053                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5054                 WREG32(mmSRBM_SOFT_RESET, tmp);
5055                 tmp = RREG32(mmSRBM_SOFT_RESET);
5056
5057                 udelay(50);
5058
5059                 tmp &= ~srbm_soft_reset;
5060                 WREG32(mmSRBM_SOFT_RESET, tmp);
5061                 tmp = RREG32(mmSRBM_SOFT_RESET);
5062         }
5063
5064         if (grbm_soft_reset || srbm_soft_reset) {
5065                 tmp = RREG32(mmGMCON_DEBUG);
5066                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5067                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5068                 WREG32(mmGMCON_DEBUG, tmp);
5069         }
5070
5071         /* Wait a little for things to settle down */
5072         udelay(50);
5073
5074         return 0;
5075 }
5076
5077 static int gfx_v8_0_post_soft_reset(void *handle)
5078 {
5079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080         u32 grbm_soft_reset = 0;
5081
5082         if ((!adev->gfx.grbm_soft_reset) &&
5083             (!adev->gfx.srbm_soft_reset))
5084                 return 0;
5085
5086         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5087
5088         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5089             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5090             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5091             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5092                 int i;
5093
5094                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5095                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5096
5097                         mutex_lock(&adev->srbm_mutex);
5098                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5099                         gfx_v8_0_deactivate_hqd(adev, 2);
5100                         vi_srbm_select(adev, 0, 0, 0, 0);
5101                         mutex_unlock(&adev->srbm_mutex);
5102                 }
5103                 gfx_v8_0_kiq_resume(adev);
5104                 gfx_v8_0_kcq_resume(adev);
5105         }
5106
5107         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5108             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5109                 gfx_v8_0_cp_gfx_resume(adev);
5110
5111         gfx_v8_0_cp_test_all_rings(adev);
5112
5113         adev->gfx.rlc.funcs->start(adev);
5114
5115         return 0;
5116 }
5117
5118 /**
5119  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5120  *
5121  * @adev: amdgpu_device pointer
5122  *
5123  * Fetches a GPU clock counter snapshot.
5124  * Returns the 64 bit clock counter snapshot.
5125  */
5126 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5127 {
5128         uint64_t clock;
5129
5130         mutex_lock(&adev->gfx.gpu_clock_mutex);
5131         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5132         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5133                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5134         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5135         return clock;
5136 }
5137
5138 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5139                                           uint32_t vmid,
5140                                           uint32_t gds_base, uint32_t gds_size,
5141                                           uint32_t gws_base, uint32_t gws_size,
5142                                           uint32_t oa_base, uint32_t oa_size)
5143 {
5144         /* GDS Base */
5145         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5146         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5147                                 WRITE_DATA_DST_SEL(0)));
5148         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5149         amdgpu_ring_write(ring, 0);
5150         amdgpu_ring_write(ring, gds_base);
5151
5152         /* GDS Size */
5153         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5154         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5155                                 WRITE_DATA_DST_SEL(0)));
5156         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5157         amdgpu_ring_write(ring, 0);
5158         amdgpu_ring_write(ring, gds_size);
5159
5160         /* GWS */
5161         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5162         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5163                                 WRITE_DATA_DST_SEL(0)));
5164         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5165         amdgpu_ring_write(ring, 0);
5166         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5167
5168         /* OA */
5169         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5170         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5171                                 WRITE_DATA_DST_SEL(0)));
5172         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5173         amdgpu_ring_write(ring, 0);
5174         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5175 }
5176
5177 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5178 {
5179         WREG32(mmSQ_IND_INDEX,
5180                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5181                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5182                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5183                 (SQ_IND_INDEX__FORCE_READ_MASK));
5184         return RREG32(mmSQ_IND_DATA);
5185 }
5186
5187 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5188                            uint32_t wave, uint32_t thread,
5189                            uint32_t regno, uint32_t num, uint32_t *out)
5190 {
5191         WREG32(mmSQ_IND_INDEX,
5192                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5193                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5194                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5195                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5196                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5197                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5198         while (num--)
5199                 *(out++) = RREG32(mmSQ_IND_DATA);
5200 }
5201
5202 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5203 {
5204         /* type 0 wave data */
5205         dst[(*no_fields)++] = 0;
5206         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5207         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5208         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5209         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5210         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5211         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5212         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5213         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5214         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5215         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5216         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5217         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5218         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5219         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5220         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5221         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5222         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5223         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5224 }
5225
5226 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5227                                      uint32_t wave, uint32_t start,
5228                                      uint32_t size, uint32_t *dst)
5229 {
5230         wave_read_regs(
5231                 adev, simd, wave, 0,
5232                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5233 }
5234
5235
5236 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5237         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5238         .select_se_sh = &gfx_v8_0_select_se_sh,
5239         .read_wave_data = &gfx_v8_0_read_wave_data,
5240         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5241         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5242 };
5243
5244 static int gfx_v8_0_early_init(void *handle)
5245 {
5246         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5247
5248         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5249         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5250         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5251         gfx_v8_0_set_ring_funcs(adev);
5252         gfx_v8_0_set_irq_funcs(adev);
5253         gfx_v8_0_set_gds_init(adev);
5254         gfx_v8_0_set_rlc_funcs(adev);
5255
5256         return 0;
5257 }
5258
5259 static int gfx_v8_0_late_init(void *handle)
5260 {
5261         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5262         int r;
5263
5264         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5265         if (r)
5266                 return r;
5267
5268         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5269         if (r)
5270                 return r;
5271
5272         /* requires IBs so do in late init after IB pool is initialized */
5273         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5274         if (r)
5275                 return r;
5276
5277         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5278         if (r) {
5279                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5280                 return r;
5281         }
5282
5283         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5284         if (r) {
5285                 DRM_ERROR(
5286                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5287                         r);
5288                 return r;
5289         }
5290
5291         return 0;
5292 }
5293
5294 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5295                                                        bool enable)
5296 {
5297         if (((adev->asic_type == CHIP_POLARIS11) ||
5298             (adev->asic_type == CHIP_POLARIS12) ||
5299             (adev->asic_type == CHIP_VEGAM)) &&
5300             adev->powerplay.pp_funcs->set_powergating_by_smu)
5301                 /* Send msg to SMU via Powerplay */
5302                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5303
5304         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5305 }
5306
5307 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5308                                                         bool enable)
5309 {
5310         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5311 }
5312
5313 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5314                 bool enable)
5315 {
5316         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5317 }
5318
5319 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5320                                           bool enable)
5321 {
5322         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5323 }
5324
5325 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5326                                                 bool enable)
5327 {
5328         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5329
5330         /* Read any GFX register to wake up GFX. */
5331         if (!enable)
5332                 RREG32(mmDB_RENDER_CONTROL);
5333 }
5334
5335 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5336                                           bool enable)
5337 {
5338         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5339                 cz_enable_gfx_cg_power_gating(adev, true);
5340                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5341                         cz_enable_gfx_pipeline_power_gating(adev, true);
5342         } else {
5343                 cz_enable_gfx_cg_power_gating(adev, false);
5344                 cz_enable_gfx_pipeline_power_gating(adev, false);
5345         }
5346 }
5347
5348 static int gfx_v8_0_set_powergating_state(void *handle,
5349                                           enum amd_powergating_state state)
5350 {
5351         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5352         bool enable = (state == AMD_PG_STATE_GATE);
5353
5354         if (amdgpu_sriov_vf(adev))
5355                 return 0;
5356
5357         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5358                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5359                                 AMD_PG_SUPPORT_CP |
5360                                 AMD_PG_SUPPORT_GFX_DMG))
5361                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5362         switch (adev->asic_type) {
5363         case CHIP_CARRIZO:
5364         case CHIP_STONEY:
5365
5366                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5367                         cz_enable_sck_slow_down_on_power_up(adev, true);
5368                         cz_enable_sck_slow_down_on_power_down(adev, true);
5369                 } else {
5370                         cz_enable_sck_slow_down_on_power_up(adev, false);
5371                         cz_enable_sck_slow_down_on_power_down(adev, false);
5372                 }
5373                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5374                         cz_enable_cp_power_gating(adev, true);
5375                 else
5376                         cz_enable_cp_power_gating(adev, false);
5377
5378                 cz_update_gfx_cg_power_gating(adev, enable);
5379
5380                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5381                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5382                 else
5383                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5384
5385                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5386                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5387                 else
5388                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5389                 break;
5390         case CHIP_POLARIS11:
5391         case CHIP_POLARIS12:
5392         case CHIP_VEGAM:
5393                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5394                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5395                 else
5396                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5397
5398                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5399                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5400                 else
5401                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5402
5403                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5404                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5405                 else
5406                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5407                 break;
5408         default:
5409                 break;
5410         }
5411         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5412                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5413                                 AMD_PG_SUPPORT_CP |
5414                                 AMD_PG_SUPPORT_GFX_DMG))
5415                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5416         return 0;
5417 }
5418
5419 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5420 {
5421         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5422         int data;
5423
5424         if (amdgpu_sriov_vf(adev))
5425                 *flags = 0;
5426
5427         /* AMD_CG_SUPPORT_GFX_MGCG */
5428         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5429         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5430                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5431
5432         /* AMD_CG_SUPPORT_GFX_CGLG */
5433         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5434         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5435                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5436
5437         /* AMD_CG_SUPPORT_GFX_CGLS */
5438         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5439                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5440
5441         /* AMD_CG_SUPPORT_GFX_CGTS */
5442         data = RREG32(mmCGTS_SM_CTRL_REG);
5443         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5444                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5445
5446         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5447         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5448                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5449
5450         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5451         data = RREG32(mmRLC_MEM_SLP_CNTL);
5452         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5453                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5454
5455         /* AMD_CG_SUPPORT_GFX_CP_LS */
5456         data = RREG32(mmCP_MEM_SLP_CNTL);
5457         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5458                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5459 }
5460
5461 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5462                                      uint32_t reg_addr, uint32_t cmd)
5463 {
5464         uint32_t data;
5465
5466         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5467
5468         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5469         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5470
5471         data = RREG32(mmRLC_SERDES_WR_CTRL);
5472         if (adev->asic_type == CHIP_STONEY)
5473                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5474                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5475                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5476                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5477                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5478                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5479                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5480                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5481                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5482         else
5483                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5484                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5485                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5486                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5487                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5488                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5489                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5490                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5491                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5492                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5493                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5494         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5495                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5496                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5497                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5498
5499         WREG32(mmRLC_SERDES_WR_CTRL, data);
5500 }
5501
5502 #define MSG_ENTER_RLC_SAFE_MODE     1
5503 #define MSG_EXIT_RLC_SAFE_MODE      0
5504 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5505 #define RLC_GPR_REG2__REQ__SHIFT 0
5506 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5507 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5508
5509 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5510 {
5511         uint32_t rlc_setting;
5512
5513         rlc_setting = RREG32(mmRLC_CNTL);
5514         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5515                 return false;
5516
5517         return true;
5518 }
5519
5520 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5521 {
5522         uint32_t data;
5523         unsigned i;
5524         data = RREG32(mmRLC_CNTL);
5525         data |= RLC_SAFE_MODE__CMD_MASK;
5526         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5527         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5528         WREG32(mmRLC_SAFE_MODE, data);
5529
5530         /* wait for RLC_SAFE_MODE */
5531         for (i = 0; i < adev->usec_timeout; i++) {
5532                 if ((RREG32(mmRLC_GPM_STAT) &
5533                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5534                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5535                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5536                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5537                         break;
5538                 udelay(1);
5539         }
5540         for (i = 0; i < adev->usec_timeout; i++) {
5541                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5542                         break;
5543                 udelay(1);
5544         }
5545 }
5546
5547 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5548 {
5549         uint32_t data;
5550         unsigned i;
5551
5552         data = RREG32(mmRLC_CNTL);
5553         data |= RLC_SAFE_MODE__CMD_MASK;
5554         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5555         WREG32(mmRLC_SAFE_MODE, data);
5556
5557         for (i = 0; i < adev->usec_timeout; i++) {
5558                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5559                         break;
5560                 udelay(1);
5561         }
5562 }
5563
5564 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5565         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5566         .set_safe_mode = gfx_v8_0_set_safe_mode,
5567         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5568         .init = gfx_v8_0_rlc_init,
5569         .get_csb_size = gfx_v8_0_get_csb_size,
5570         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5571         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5572         .resume = gfx_v8_0_rlc_resume,
5573         .stop = gfx_v8_0_rlc_stop,
5574         .reset = gfx_v8_0_rlc_reset,
5575         .start = gfx_v8_0_rlc_start
5576 };
5577
5578 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5579                                                       bool enable)
5580 {
5581         uint32_t temp, data;
5582
5583         amdgpu_gfx_rlc_enter_safe_mode(adev);
5584
5585         /* It is disabled by HW by default */
5586         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5587                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5588                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5589                                 /* 1 - RLC memory Light sleep */
5590                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5591
5592                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5593                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5594                 }
5595
5596                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5597                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5598                 if (adev->flags & AMD_IS_APU)
5599                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5600                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5601                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5602                 else
5603                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5604                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5605                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5606                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5607
5608                 if (temp != data)
5609                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5610
5611                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5612                 gfx_v8_0_wait_for_rlc_serdes(adev);
5613
5614                 /* 5 - clear mgcg override */
5615                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5616
5617                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5618                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5619                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5620                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5621                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5622                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5623                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5624                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5625                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5626                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5627                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5628                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5629                         if (temp != data)
5630                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5631                 }
5632                 udelay(50);
5633
5634                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5635                 gfx_v8_0_wait_for_rlc_serdes(adev);
5636         } else {
5637                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5638                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5639                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5640                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5641                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5642                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5643                 if (temp != data)
5644                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5645
5646                 /* 2 - disable MGLS in RLC */
5647                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5648                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5649                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5650                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5651                 }
5652
5653                 /* 3 - disable MGLS in CP */
5654                 data = RREG32(mmCP_MEM_SLP_CNTL);
5655                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5656                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5657                         WREG32(mmCP_MEM_SLP_CNTL, data);
5658                 }
5659
5660                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5661                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5662                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5663                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5664                 if (temp != data)
5665                         WREG32(mmCGTS_SM_CTRL_REG, data);
5666
5667                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5668                 gfx_v8_0_wait_for_rlc_serdes(adev);
5669
5670                 /* 6 - set mgcg override */
5671                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5672
5673                 udelay(50);
5674
5675                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5676                 gfx_v8_0_wait_for_rlc_serdes(adev);
5677         }
5678
5679         amdgpu_gfx_rlc_exit_safe_mode(adev);
5680 }
5681
5682 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5683                                                       bool enable)
5684 {
5685         uint32_t temp, temp1, data, data1;
5686
5687         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5688
5689         amdgpu_gfx_rlc_enter_safe_mode(adev);
5690
5691         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5692                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5693                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5694                 if (temp1 != data1)
5695                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5696
5697                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5698                 gfx_v8_0_wait_for_rlc_serdes(adev);
5699
5700                 /* 2 - clear cgcg override */
5701                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5702
5703                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704                 gfx_v8_0_wait_for_rlc_serdes(adev);
5705
5706                 /* 3 - write cmd to set CGLS */
5707                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5708
5709                 /* 4 - enable cgcg */
5710                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5711
5712                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5713                         /* enable cgls*/
5714                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5715
5716                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5717                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5718
5719                         if (temp1 != data1)
5720                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5721                 } else {
5722                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5723                 }
5724
5725                 if (temp != data)
5726                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5727
5728                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5729                  * Cmp_busy/GFX_Idle interrupts
5730                  */
5731                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5732         } else {
5733                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5734                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5735
5736                 /* TEST CGCG */
5737                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5738                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5739                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5740                 if (temp1 != data1)
5741                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5742
5743                 /* read gfx register to wake up cgcg */
5744                 RREG32(mmCB_CGTT_SCLK_CTRL);
5745                 RREG32(mmCB_CGTT_SCLK_CTRL);
5746                 RREG32(mmCB_CGTT_SCLK_CTRL);
5747                 RREG32(mmCB_CGTT_SCLK_CTRL);
5748
5749                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5750                 gfx_v8_0_wait_for_rlc_serdes(adev);
5751
5752                 /* write cmd to Set CGCG Overrride */
5753                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5754
5755                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5756                 gfx_v8_0_wait_for_rlc_serdes(adev);
5757
5758                 /* write cmd to Clear CGLS */
5759                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5760
5761                 /* disable cgcg, cgls should be disabled too. */
5762                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5763                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5764                 if (temp != data)
5765                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5766                 /* enable interrupts again for PG */
5767                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5768         }
5769
5770         gfx_v8_0_wait_for_rlc_serdes(adev);
5771
5772         amdgpu_gfx_rlc_exit_safe_mode(adev);
5773 }
5774 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5775                                             bool enable)
5776 {
5777         if (enable) {
5778                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5779                  * ===  MGCG + MGLS + TS(CG/LS) ===
5780                  */
5781                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5782                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5783         } else {
5784                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5785                  * ===  CGCG + CGLS ===
5786                  */
5787                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5788                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5789         }
5790         return 0;
5791 }
5792
5793 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5794                                           enum amd_clockgating_state state)
5795 {
5796         uint32_t msg_id, pp_state = 0;
5797         uint32_t pp_support_state = 0;
5798
5799         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5800                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5801                         pp_support_state = PP_STATE_SUPPORT_LS;
5802                         pp_state = PP_STATE_LS;
5803                 }
5804                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5805                         pp_support_state |= PP_STATE_SUPPORT_CG;
5806                         pp_state |= PP_STATE_CG;
5807                 }
5808                 if (state == AMD_CG_STATE_UNGATE)
5809                         pp_state = 0;
5810
5811                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5812                                 PP_BLOCK_GFX_CG,
5813                                 pp_support_state,
5814                                 pp_state);
5815                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5816                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5817         }
5818
5819         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5820                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5821                         pp_support_state = PP_STATE_SUPPORT_LS;
5822                         pp_state = PP_STATE_LS;
5823                 }
5824
5825                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5826                         pp_support_state |= PP_STATE_SUPPORT_CG;
5827                         pp_state |= PP_STATE_CG;
5828                 }
5829
5830                 if (state == AMD_CG_STATE_UNGATE)
5831                         pp_state = 0;
5832
5833                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5834                                 PP_BLOCK_GFX_MG,
5835                                 pp_support_state,
5836                                 pp_state);
5837                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5838                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5839         }
5840
5841         return 0;
5842 }
5843
5844 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5845                                           enum amd_clockgating_state state)
5846 {
5847
5848         uint32_t msg_id, pp_state = 0;
5849         uint32_t pp_support_state = 0;
5850
5851         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5852                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5853                         pp_support_state = PP_STATE_SUPPORT_LS;
5854                         pp_state = PP_STATE_LS;
5855                 }
5856                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5857                         pp_support_state |= PP_STATE_SUPPORT_CG;
5858                         pp_state |= PP_STATE_CG;
5859                 }
5860                 if (state == AMD_CG_STATE_UNGATE)
5861                         pp_state = 0;
5862
5863                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5864                                 PP_BLOCK_GFX_CG,
5865                                 pp_support_state,
5866                                 pp_state);
5867                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5868                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5869         }
5870
5871         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5872                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5873                         pp_support_state = PP_STATE_SUPPORT_LS;
5874                         pp_state = PP_STATE_LS;
5875                 }
5876                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5877                         pp_support_state |= PP_STATE_SUPPORT_CG;
5878                         pp_state |= PP_STATE_CG;
5879                 }
5880                 if (state == AMD_CG_STATE_UNGATE)
5881                         pp_state = 0;
5882
5883                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5884                                 PP_BLOCK_GFX_3D,
5885                                 pp_support_state,
5886                                 pp_state);
5887                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5888                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5889         }
5890
5891         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5892                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5893                         pp_support_state = PP_STATE_SUPPORT_LS;
5894                         pp_state = PP_STATE_LS;
5895                 }
5896
5897                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5898                         pp_support_state |= PP_STATE_SUPPORT_CG;
5899                         pp_state |= PP_STATE_CG;
5900                 }
5901
5902                 if (state == AMD_CG_STATE_UNGATE)
5903                         pp_state = 0;
5904
5905                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5906                                 PP_BLOCK_GFX_MG,
5907                                 pp_support_state,
5908                                 pp_state);
5909                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5910                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5911         }
5912
5913         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5914                 pp_support_state = PP_STATE_SUPPORT_LS;
5915
5916                 if (state == AMD_CG_STATE_UNGATE)
5917                         pp_state = 0;
5918                 else
5919                         pp_state = PP_STATE_LS;
5920
5921                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5922                                 PP_BLOCK_GFX_RLC,
5923                                 pp_support_state,
5924                                 pp_state);
5925                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5926                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5927         }
5928
5929         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5930                 pp_support_state = PP_STATE_SUPPORT_LS;
5931
5932                 if (state == AMD_CG_STATE_UNGATE)
5933                         pp_state = 0;
5934                 else
5935                         pp_state = PP_STATE_LS;
5936                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5937                         PP_BLOCK_GFX_CP,
5938                         pp_support_state,
5939                         pp_state);
5940                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5941                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5942         }
5943
5944         return 0;
5945 }
5946
5947 static int gfx_v8_0_set_clockgating_state(void *handle,
5948                                           enum amd_clockgating_state state)
5949 {
5950         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5951
5952         if (amdgpu_sriov_vf(adev))
5953                 return 0;
5954
5955         switch (adev->asic_type) {
5956         case CHIP_FIJI:
5957         case CHIP_CARRIZO:
5958         case CHIP_STONEY:
5959                 gfx_v8_0_update_gfx_clock_gating(adev,
5960                                                  state == AMD_CG_STATE_GATE);
5961                 break;
5962         case CHIP_TONGA:
5963                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5964                 break;
5965         case CHIP_POLARIS10:
5966         case CHIP_POLARIS11:
5967         case CHIP_POLARIS12:
5968         case CHIP_VEGAM:
5969                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5970                 break;
5971         default:
5972                 break;
5973         }
5974         return 0;
5975 }
5976
5977 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5978 {
5979         return ring->adev->wb.wb[ring->rptr_offs];
5980 }
5981
5982 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5983 {
5984         struct amdgpu_device *adev = ring->adev;
5985
5986         if (ring->use_doorbell)
5987                 /* XXX check if swapping is necessary on BE */
5988                 return ring->adev->wb.wb[ring->wptr_offs];
5989         else
5990                 return RREG32(mmCP_RB0_WPTR);
5991 }
5992
5993 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5994 {
5995         struct amdgpu_device *adev = ring->adev;
5996
5997         if (ring->use_doorbell) {
5998                 /* XXX check if swapping is necessary on BE */
5999                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6000                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6001         } else {
6002                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6003                 (void)RREG32(mmCP_RB0_WPTR);
6004         }
6005 }
6006
6007 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6008 {
6009         u32 ref_and_mask, reg_mem_engine;
6010
6011         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6012             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6013                 switch (ring->me) {
6014                 case 1:
6015                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6016                         break;
6017                 case 2:
6018                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6019                         break;
6020                 default:
6021                         return;
6022                 }
6023                 reg_mem_engine = 0;
6024         } else {
6025                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6026                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6027         }
6028
6029         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6030         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6031                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6032                                  reg_mem_engine));
6033         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6034         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6035         amdgpu_ring_write(ring, ref_and_mask);
6036         amdgpu_ring_write(ring, ref_and_mask);
6037         amdgpu_ring_write(ring, 0x20); /* poll interval */
6038 }
6039
6040 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6041 {
6042         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6043         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6044                 EVENT_INDEX(4));
6045
6046         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6047         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6048                 EVENT_INDEX(0));
6049 }
6050
6051 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6052                                         struct amdgpu_job *job,
6053                                         struct amdgpu_ib *ib,
6054                                         uint32_t flags)
6055 {
6056         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6057         u32 header, control = 0;
6058
6059         if (ib->flags & AMDGPU_IB_FLAG_CE)
6060                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6061         else
6062                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6063
6064         control |= ib->length_dw | (vmid << 24);
6065
6066         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6067                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6068
6069                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6070                         gfx_v8_0_ring_emit_de_meta(ring);
6071         }
6072
6073         amdgpu_ring_write(ring, header);
6074         amdgpu_ring_write(ring,
6075 #ifdef __BIG_ENDIAN
6076                           (2 << 0) |
6077 #endif
6078                           (ib->gpu_addr & 0xFFFFFFFC));
6079         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6080         amdgpu_ring_write(ring, control);
6081 }
6082
6083 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6084                                           struct amdgpu_job *job,
6085                                           struct amdgpu_ib *ib,
6086                                           uint32_t flags)
6087 {
6088         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6089         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6090
6091         /* Currently, there is a high possibility to get wave ID mismatch
6092          * between ME and GDS, leading to a hw deadlock, because ME generates
6093          * different wave IDs than the GDS expects. This situation happens
6094          * randomly when at least 5 compute pipes use GDS ordered append.
6095          * The wave IDs generated by ME are also wrong after suspend/resume.
6096          * Those are probably bugs somewhere else in the kernel driver.
6097          *
6098          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6099          * GDS to 0 for this ring (me/pipe).
6100          */
6101         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6102                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6103                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6104                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6105         }
6106
6107         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6108         amdgpu_ring_write(ring,
6109 #ifdef __BIG_ENDIAN
6110                                 (2 << 0) |
6111 #endif
6112                                 (ib->gpu_addr & 0xFFFFFFFC));
6113         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6114         amdgpu_ring_write(ring, control);
6115 }
6116
6117 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6118                                          u64 seq, unsigned flags)
6119 {
6120         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6121         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6122
6123         /* EVENT_WRITE_EOP - flush caches, send int */
6124         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6125         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6126                                  EOP_TC_ACTION_EN |
6127                                  EOP_TC_WB_ACTION_EN |
6128                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6129                                  EVENT_INDEX(5)));
6130         amdgpu_ring_write(ring, addr & 0xfffffffc);
6131         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6132                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6133         amdgpu_ring_write(ring, lower_32_bits(seq));
6134         amdgpu_ring_write(ring, upper_32_bits(seq));
6135
6136 }
6137
6138 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6139 {
6140         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6141         uint32_t seq = ring->fence_drv.sync_seq;
6142         uint64_t addr = ring->fence_drv.gpu_addr;
6143
6144         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6145         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6146                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6147                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6148         amdgpu_ring_write(ring, addr & 0xfffffffc);
6149         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6150         amdgpu_ring_write(ring, seq);
6151         amdgpu_ring_write(ring, 0xffffffff);
6152         amdgpu_ring_write(ring, 4); /* poll interval */
6153 }
6154
6155 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6156                                         unsigned vmid, uint64_t pd_addr)
6157 {
6158         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6159
6160         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6161
6162         /* wait for the invalidate to complete */
6163         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6164         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6165                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6166                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6167         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6168         amdgpu_ring_write(ring, 0);
6169         amdgpu_ring_write(ring, 0); /* ref */
6170         amdgpu_ring_write(ring, 0); /* mask */
6171         amdgpu_ring_write(ring, 0x20); /* poll interval */
6172
6173         /* compute doesn't have PFP */
6174         if (usepfp) {
6175                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6176                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6177                 amdgpu_ring_write(ring, 0x0);
6178         }
6179 }
6180
6181 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6182 {
6183         return ring->adev->wb.wb[ring->wptr_offs];
6184 }
6185
6186 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6187 {
6188         struct amdgpu_device *adev = ring->adev;
6189
6190         /* XXX check if swapping is necessary on BE */
6191         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6192         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6193 }
6194
6195 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6196                                            bool acquire)
6197 {
6198         struct amdgpu_device *adev = ring->adev;
6199         int pipe_num, tmp, reg;
6200         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6201
6202         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6203
6204         /* first me only has 2 entries, GFX and HP3D */
6205         if (ring->me > 0)
6206                 pipe_num -= 2;
6207
6208         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6209         tmp = RREG32(reg);
6210         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6211         WREG32(reg, tmp);
6212 }
6213
6214 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6215                                             struct amdgpu_ring *ring,
6216                                             bool acquire)
6217 {
6218         int i, pipe;
6219         bool reserve;
6220         struct amdgpu_ring *iring;
6221
6222         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6223         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6224         if (acquire)
6225                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6226         else
6227                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6228
6229         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6230                 /* Clear all reservations - everyone reacquires all resources */
6231                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6232                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6233                                                        true);
6234
6235                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6236                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6237                                                        true);
6238         } else {
6239                 /* Lower all pipes without a current reservation */
6240                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6241                         iring = &adev->gfx.gfx_ring[i];
6242                         pipe = amdgpu_gfx_queue_to_bit(adev,
6243                                                        iring->me,
6244                                                        iring->pipe,
6245                                                        0);
6246                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6247                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6248                 }
6249
6250                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6251                         iring = &adev->gfx.compute_ring[i];
6252                         pipe = amdgpu_gfx_queue_to_bit(adev,
6253                                                        iring->me,
6254                                                        iring->pipe,
6255                                                        0);
6256                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6257                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6258                 }
6259         }
6260
6261         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6262 }
6263
6264 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6265                                       struct amdgpu_ring *ring,
6266                                       bool acquire)
6267 {
6268         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6269         uint32_t queue_priority = acquire ? 0xf : 0x0;
6270
6271         mutex_lock(&adev->srbm_mutex);
6272         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6273
6274         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6275         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6276
6277         vi_srbm_select(adev, 0, 0, 0, 0);
6278         mutex_unlock(&adev->srbm_mutex);
6279 }
6280 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6281                                                enum drm_sched_priority priority)
6282 {
6283         struct amdgpu_device *adev = ring->adev;
6284         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6285
6286         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6287                 return;
6288
6289         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6290         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6291 }
6292
6293 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6294                                              u64 addr, u64 seq,
6295                                              unsigned flags)
6296 {
6297         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6298         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6299
6300         /* RELEASE_MEM - flush caches, send int */
6301         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6302         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6303                                  EOP_TC_ACTION_EN |
6304                                  EOP_TC_WB_ACTION_EN |
6305                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6306                                  EVENT_INDEX(5)));
6307         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6308         amdgpu_ring_write(ring, addr & 0xfffffffc);
6309         amdgpu_ring_write(ring, upper_32_bits(addr));
6310         amdgpu_ring_write(ring, lower_32_bits(seq));
6311         amdgpu_ring_write(ring, upper_32_bits(seq));
6312 }
6313
6314 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6315                                          u64 seq, unsigned int flags)
6316 {
6317         /* we only allocate 32bit for each seq wb address */
6318         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6319
6320         /* write fence seq to the "addr" */
6321         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6322         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6323                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6324         amdgpu_ring_write(ring, lower_32_bits(addr));
6325         amdgpu_ring_write(ring, upper_32_bits(addr));
6326         amdgpu_ring_write(ring, lower_32_bits(seq));
6327
6328         if (flags & AMDGPU_FENCE_FLAG_INT) {
6329                 /* set register to trigger INT */
6330                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6331                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6332                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6333                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6334                 amdgpu_ring_write(ring, 0);
6335                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6336         }
6337 }
6338
6339 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6340 {
6341         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6342         amdgpu_ring_write(ring, 0);
6343 }
6344
6345 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6346 {
6347         uint32_t dw2 = 0;
6348
6349         if (amdgpu_sriov_vf(ring->adev))
6350                 gfx_v8_0_ring_emit_ce_meta(ring);
6351
6352         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6353         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6354                 gfx_v8_0_ring_emit_vgt_flush(ring);
6355                 /* set load_global_config & load_global_uconfig */
6356                 dw2 |= 0x8001;
6357                 /* set load_cs_sh_regs */
6358                 dw2 |= 0x01000000;
6359                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6360                 dw2 |= 0x10002;
6361
6362                 /* set load_ce_ram if preamble presented */
6363                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6364                         dw2 |= 0x10000000;
6365         } else {
6366                 /* still load_ce_ram if this is the first time preamble presented
6367                  * although there is no context switch happens.
6368                  */
6369                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6370                         dw2 |= 0x10000000;
6371         }
6372
6373         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6374         amdgpu_ring_write(ring, dw2);
6375         amdgpu_ring_write(ring, 0);
6376 }
6377
6378 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6379 {
6380         unsigned ret;
6381
6382         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6383         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6384         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6385         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6386         ret = ring->wptr & ring->buf_mask;
6387         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6388         return ret;
6389 }
6390
6391 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6392 {
6393         unsigned cur;
6394
6395         BUG_ON(offset > ring->buf_mask);
6396         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6397
6398         cur = (ring->wptr & ring->buf_mask) - 1;
6399         if (likely(cur > offset))
6400                 ring->ring[offset] = cur - offset;
6401         else
6402                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6403 }
6404
6405 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6406 {
6407         struct amdgpu_device *adev = ring->adev;
6408
6409         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6410         amdgpu_ring_write(ring, 0 |     /* src: register*/
6411                                 (5 << 8) |      /* dst: memory */
6412                                 (1 << 20));     /* write confirm */
6413         amdgpu_ring_write(ring, reg);
6414         amdgpu_ring_write(ring, 0);
6415         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6416                                 adev->virt.reg_val_offs * 4));
6417         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6418                                 adev->virt.reg_val_offs * 4));
6419 }
6420
6421 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6422                                   uint32_t val)
6423 {
6424         uint32_t cmd;
6425
6426         switch (ring->funcs->type) {
6427         case AMDGPU_RING_TYPE_GFX:
6428                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6429                 break;
6430         case AMDGPU_RING_TYPE_KIQ:
6431                 cmd = 1 << 16; /* no inc addr */
6432                 break;
6433         default:
6434                 cmd = WR_CONFIRM;
6435                 break;
6436         }
6437
6438         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6439         amdgpu_ring_write(ring, cmd);
6440         amdgpu_ring_write(ring, reg);
6441         amdgpu_ring_write(ring, 0);
6442         amdgpu_ring_write(ring, val);
6443 }
6444
6445 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6446 {
6447         struct amdgpu_device *adev = ring->adev;
6448         uint32_t value = 0;
6449
6450         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6451         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6452         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6453         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6454         WREG32(mmSQ_CMD, value);
6455 }
6456
6457 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6458                                                  enum amdgpu_interrupt_state state)
6459 {
6460         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6461                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6462 }
6463
6464 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6465                                                      int me, int pipe,
6466                                                      enum amdgpu_interrupt_state state)
6467 {
6468         u32 mec_int_cntl, mec_int_cntl_reg;
6469
6470         /*
6471          * amdgpu controls only the first MEC. That's why this function only
6472          * handles the setting of interrupts for this specific MEC. All other
6473          * pipes' interrupts are set by amdkfd.
6474          */
6475
6476         if (me == 1) {
6477                 switch (pipe) {
6478                 case 0:
6479                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6480                         break;
6481                 case 1:
6482                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6483                         break;
6484                 case 2:
6485                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6486                         break;
6487                 case 3:
6488                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6489                         break;
6490                 default:
6491                         DRM_DEBUG("invalid pipe %d\n", pipe);
6492                         return;
6493                 }
6494         } else {
6495                 DRM_DEBUG("invalid me %d\n", me);
6496                 return;
6497         }
6498
6499         switch (state) {
6500         case AMDGPU_IRQ_STATE_DISABLE:
6501                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6502                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6503                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6504                 break;
6505         case AMDGPU_IRQ_STATE_ENABLE:
6506                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6507                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6508                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6509                 break;
6510         default:
6511                 break;
6512         }
6513 }
6514
6515 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6516                                              struct amdgpu_irq_src *source,
6517                                              unsigned type,
6518                                              enum amdgpu_interrupt_state state)
6519 {
6520         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6521                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6522
6523         return 0;
6524 }
6525
6526 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6527                                               struct amdgpu_irq_src *source,
6528                                               unsigned type,
6529                                               enum amdgpu_interrupt_state state)
6530 {
6531         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6532                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6533
6534         return 0;
6535 }
6536
6537 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6538                                             struct amdgpu_irq_src *src,
6539                                             unsigned type,
6540                                             enum amdgpu_interrupt_state state)
6541 {
6542         switch (type) {
6543         case AMDGPU_CP_IRQ_GFX_EOP:
6544                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6545                 break;
6546         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6547                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6548                 break;
6549         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6550                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6551                 break;
6552         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6553                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6554                 break;
6555         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6556                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6557                 break;
6558         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6559                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6560                 break;
6561         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6562                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6563                 break;
6564         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6565                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6566                 break;
6567         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6568                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6569                 break;
6570         default:
6571                 break;
6572         }
6573         return 0;
6574 }
6575
6576 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6577                                          struct amdgpu_irq_src *source,
6578                                          unsigned int type,
6579                                          enum amdgpu_interrupt_state state)
6580 {
6581         int enable_flag;
6582
6583         switch (state) {
6584         case AMDGPU_IRQ_STATE_DISABLE:
6585                 enable_flag = 0;
6586                 break;
6587
6588         case AMDGPU_IRQ_STATE_ENABLE:
6589                 enable_flag = 1;
6590                 break;
6591
6592         default:
6593                 return -EINVAL;
6594         }
6595
6596         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6597         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6598         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6599         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6600         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6601         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602                      enable_flag);
6603         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6604                      enable_flag);
6605         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6606                      enable_flag);
6607         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6608                      enable_flag);
6609         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6610                      enable_flag);
6611         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6612                      enable_flag);
6613         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6614                      enable_flag);
6615         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6616                      enable_flag);
6617
6618         return 0;
6619 }
6620
6621 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6622                                      struct amdgpu_irq_src *source,
6623                                      unsigned int type,
6624                                      enum amdgpu_interrupt_state state)
6625 {
6626         int enable_flag;
6627
6628         switch (state) {
6629         case AMDGPU_IRQ_STATE_DISABLE:
6630                 enable_flag = 1;
6631                 break;
6632
6633         case AMDGPU_IRQ_STATE_ENABLE:
6634                 enable_flag = 0;
6635                 break;
6636
6637         default:
6638                 return -EINVAL;
6639         }
6640
6641         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6642                      enable_flag);
6643
6644         return 0;
6645 }
6646
6647 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6648                             struct amdgpu_irq_src *source,
6649                             struct amdgpu_iv_entry *entry)
6650 {
6651         int i;
6652         u8 me_id, pipe_id, queue_id;
6653         struct amdgpu_ring *ring;
6654
6655         DRM_DEBUG("IH: CP EOP\n");
6656         me_id = (entry->ring_id & 0x0c) >> 2;
6657         pipe_id = (entry->ring_id & 0x03) >> 0;
6658         queue_id = (entry->ring_id & 0x70) >> 4;
6659
6660         switch (me_id) {
6661         case 0:
6662                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6663                 break;
6664         case 1:
6665         case 2:
6666                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6667                         ring = &adev->gfx.compute_ring[i];
6668                         /* Per-queue interrupt is supported for MEC starting from VI.
6669                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6670                           */
6671                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6672                                 amdgpu_fence_process(ring);
6673                 }
6674                 break;
6675         }
6676         return 0;
6677 }
6678
6679 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6680                            struct amdgpu_iv_entry *entry)
6681 {
6682         u8 me_id, pipe_id, queue_id;
6683         struct amdgpu_ring *ring;
6684         int i;
6685
6686         me_id = (entry->ring_id & 0x0c) >> 2;
6687         pipe_id = (entry->ring_id & 0x03) >> 0;
6688         queue_id = (entry->ring_id & 0x70) >> 4;
6689
6690         switch (me_id) {
6691         case 0:
6692                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6693                 break;
6694         case 1:
6695         case 2:
6696                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6697                         ring = &adev->gfx.compute_ring[i];
6698                         if (ring->me == me_id && ring->pipe == pipe_id &&
6699                             ring->queue == queue_id)
6700                                 drm_sched_fault(&ring->sched);
6701                 }
6702                 break;
6703         }
6704 }
6705
6706 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6707                                  struct amdgpu_irq_src *source,
6708                                  struct amdgpu_iv_entry *entry)
6709 {
6710         DRM_ERROR("Illegal register access in command stream\n");
6711         gfx_v8_0_fault(adev, entry);
6712         return 0;
6713 }
6714
6715 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6716                                   struct amdgpu_irq_src *source,
6717                                   struct amdgpu_iv_entry *entry)
6718 {
6719         DRM_ERROR("Illegal instruction in command stream\n");
6720         gfx_v8_0_fault(adev, entry);
6721         return 0;
6722 }
6723
6724 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6725                                      struct amdgpu_irq_src *source,
6726                                      struct amdgpu_iv_entry *entry)
6727 {
6728         DRM_ERROR("CP EDC/ECC error detected.");
6729         return 0;
6730 }
6731
6732 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6733 {
6734         u32 enc, se_id, sh_id, cu_id;
6735         char type[20];
6736         int sq_edc_source = -1;
6737
6738         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6739         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6740
6741         switch (enc) {
6742                 case 0:
6743                         DRM_INFO("SQ general purpose intr detected:"
6744                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6745                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6746                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6747                                         "wlt %d, thread_trace %d.\n",
6748                                         se_id,
6749                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6750                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6751                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6752                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6753                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6754                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6755                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6756                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6757                                         );
6758                         break;
6759                 case 1:
6760                 case 2:
6761
6762                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6763                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6764
6765                         /*
6766                          * This function can be called either directly from ISR
6767                          * or from BH in which case we can access SQ_EDC_INFO
6768                          * instance
6769                          */
6770                         if (in_task()) {
6771                                 mutex_lock(&adev->grbm_idx_mutex);
6772                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6773
6774                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6775
6776                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6777                                 mutex_unlock(&adev->grbm_idx_mutex);
6778                         }
6779
6780                         if (enc == 1)
6781                                 sprintf(type, "instruction intr");
6782                         else
6783                                 sprintf(type, "EDC/ECC error");
6784
6785                         DRM_INFO(
6786                                 "SQ %s detected: "
6787                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6788                                         "trap %s, sq_ed_info.source %s.\n",
6789                                         type, se_id, sh_id, cu_id,
6790                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6791                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6792                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6793                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6794                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6795                                 );
6796                         break;
6797                 default:
6798                         DRM_ERROR("SQ invalid encoding type\n.");
6799         }
6800 }
6801
6802 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6803 {
6804
6805         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6806         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6807
6808         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6809 }
6810
6811 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6812                            struct amdgpu_irq_src *source,
6813                            struct amdgpu_iv_entry *entry)
6814 {
6815         unsigned ih_data = entry->src_data[0];
6816
6817         /*
6818          * Try to submit work so SQ_EDC_INFO can be accessed from
6819          * BH. If previous work submission hasn't finished yet
6820          * just print whatever info is possible directly from the ISR.
6821          */
6822         if (work_pending(&adev->gfx.sq_work.work)) {
6823                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6824         } else {
6825                 adev->gfx.sq_work.ih_data = ih_data;
6826                 schedule_work(&adev->gfx.sq_work.work);
6827         }
6828
6829         return 0;
6830 }
6831
6832 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6833         .name = "gfx_v8_0",
6834         .early_init = gfx_v8_0_early_init,
6835         .late_init = gfx_v8_0_late_init,
6836         .sw_init = gfx_v8_0_sw_init,
6837         .sw_fini = gfx_v8_0_sw_fini,
6838         .hw_init = gfx_v8_0_hw_init,
6839         .hw_fini = gfx_v8_0_hw_fini,
6840         .suspend = gfx_v8_0_suspend,
6841         .resume = gfx_v8_0_resume,
6842         .is_idle = gfx_v8_0_is_idle,
6843         .wait_for_idle = gfx_v8_0_wait_for_idle,
6844         .check_soft_reset = gfx_v8_0_check_soft_reset,
6845         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6846         .soft_reset = gfx_v8_0_soft_reset,
6847         .post_soft_reset = gfx_v8_0_post_soft_reset,
6848         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6849         .set_powergating_state = gfx_v8_0_set_powergating_state,
6850         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6851 };
6852
6853 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6854         .type = AMDGPU_RING_TYPE_GFX,
6855         .align_mask = 0xff,
6856         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6857         .support_64bit_ptrs = false,
6858         .get_rptr = gfx_v8_0_ring_get_rptr,
6859         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6860         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6861         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6862                 5 +  /* COND_EXEC */
6863                 7 +  /* PIPELINE_SYNC */
6864                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6865                 8 +  /* FENCE for VM_FLUSH */
6866                 20 + /* GDS switch */
6867                 4 + /* double SWITCH_BUFFER,
6868                        the first COND_EXEC jump to the place just
6869                            prior to this double SWITCH_BUFFER  */
6870                 5 + /* COND_EXEC */
6871                 7 +      /*     HDP_flush */
6872                 4 +      /*     VGT_flush */
6873                 14 + /* CE_META */
6874                 31 + /* DE_META */
6875                 3 + /* CNTX_CTRL */
6876                 5 + /* HDP_INVL */
6877                 8 + 8 + /* FENCE x2 */
6878                 2, /* SWITCH_BUFFER */
6879         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6880         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6881         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6882         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6883         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6884         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6885         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6886         .test_ring = gfx_v8_0_ring_test_ring,
6887         .test_ib = gfx_v8_0_ring_test_ib,
6888         .insert_nop = amdgpu_ring_insert_nop,
6889         .pad_ib = amdgpu_ring_generic_pad_ib,
6890         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6891         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6892         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6893         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6894         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6895         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6896 };
6897
6898 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6899         .type = AMDGPU_RING_TYPE_COMPUTE,
6900         .align_mask = 0xff,
6901         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6902         .support_64bit_ptrs = false,
6903         .get_rptr = gfx_v8_0_ring_get_rptr,
6904         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6905         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6906         .emit_frame_size =
6907                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6908                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6909                 5 + /* hdp_invalidate */
6910                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6911                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6912                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6913         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6914         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6915         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6916         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6917         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6918         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6919         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6920         .test_ring = gfx_v8_0_ring_test_ring,
6921         .test_ib = gfx_v8_0_ring_test_ib,
6922         .insert_nop = amdgpu_ring_insert_nop,
6923         .pad_ib = amdgpu_ring_generic_pad_ib,
6924         .set_priority = gfx_v8_0_ring_set_priority_compute,
6925         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6926 };
6927
6928 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6929         .type = AMDGPU_RING_TYPE_KIQ,
6930         .align_mask = 0xff,
6931         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6932         .support_64bit_ptrs = false,
6933         .get_rptr = gfx_v8_0_ring_get_rptr,
6934         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6935         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6936         .emit_frame_size =
6937                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6938                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6939                 5 + /* hdp_invalidate */
6940                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6941                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6942                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6943         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6944         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6945         .test_ring = gfx_v8_0_ring_test_ring,
6946         .insert_nop = amdgpu_ring_insert_nop,
6947         .pad_ib = amdgpu_ring_generic_pad_ib,
6948         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6949         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6950 };
6951
6952 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6953 {
6954         int i;
6955
6956         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6957
6958         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6959                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6960
6961         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6962                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6963 }
6964
6965 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6966         .set = gfx_v8_0_set_eop_interrupt_state,
6967         .process = gfx_v8_0_eop_irq,
6968 };
6969
6970 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6971         .set = gfx_v8_0_set_priv_reg_fault_state,
6972         .process = gfx_v8_0_priv_reg_irq,
6973 };
6974
6975 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6976         .set = gfx_v8_0_set_priv_inst_fault_state,
6977         .process = gfx_v8_0_priv_inst_irq,
6978 };
6979
6980 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6981         .set = gfx_v8_0_set_cp_ecc_int_state,
6982         .process = gfx_v8_0_cp_ecc_error_irq,
6983 };
6984
6985 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6986         .set = gfx_v8_0_set_sq_int_state,
6987         .process = gfx_v8_0_sq_irq,
6988 };
6989
6990 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6991 {
6992         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6993         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6994
6995         adev->gfx.priv_reg_irq.num_types = 1;
6996         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6997
6998         adev->gfx.priv_inst_irq.num_types = 1;
6999         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7000
7001         adev->gfx.cp_ecc_error_irq.num_types = 1;
7002         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7003
7004         adev->gfx.sq_irq.num_types = 1;
7005         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7006 }
7007
7008 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7009 {
7010         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7011 }
7012
7013 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7014 {
7015         /* init asci gds info */
7016         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7017         adev->gds.gws.total_size = 64;
7018         adev->gds.oa.total_size = 16;
7019         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7020
7021         if (adev->gds.mem.total_size == 64 * 1024) {
7022                 adev->gds.mem.gfx_partition_size = 4096;
7023                 adev->gds.mem.cs_partition_size = 4096;
7024
7025                 adev->gds.gws.gfx_partition_size = 4;
7026                 adev->gds.gws.cs_partition_size = 4;
7027
7028                 adev->gds.oa.gfx_partition_size = 4;
7029                 adev->gds.oa.cs_partition_size = 1;
7030         } else {
7031                 adev->gds.mem.gfx_partition_size = 1024;
7032                 adev->gds.mem.cs_partition_size = 1024;
7033
7034                 adev->gds.gws.gfx_partition_size = 16;
7035                 adev->gds.gws.cs_partition_size = 16;
7036
7037                 adev->gds.oa.gfx_partition_size = 4;
7038                 adev->gds.oa.cs_partition_size = 4;
7039         }
7040 }
7041
7042 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7043                                                  u32 bitmap)
7044 {
7045         u32 data;
7046
7047         if (!bitmap)
7048                 return;
7049
7050         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7051         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7052
7053         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7054 }
7055
7056 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7057 {
7058         u32 data, mask;
7059
7060         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7061                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7062
7063         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7064
7065         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7066 }
7067
7068 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7069 {
7070         int i, j, k, counter, active_cu_number = 0;
7071         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7072         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7073         unsigned disable_masks[4 * 2];
7074         u32 ao_cu_num;
7075
7076         memset(cu_info, 0, sizeof(*cu_info));
7077
7078         if (adev->flags & AMD_IS_APU)
7079                 ao_cu_num = 2;
7080         else
7081                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7082
7083         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7084
7085         mutex_lock(&adev->grbm_idx_mutex);
7086         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7087                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7088                         mask = 1;
7089                         ao_bitmap = 0;
7090                         counter = 0;
7091                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7092                         if (i < 4 && j < 2)
7093                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7094                                         adev, disable_masks[i * 2 + j]);
7095                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7096                         cu_info->bitmap[i][j] = bitmap;
7097
7098                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7099                                 if (bitmap & mask) {
7100                                         if (counter < ao_cu_num)
7101                                                 ao_bitmap |= mask;
7102                                         counter ++;
7103                                 }
7104                                 mask <<= 1;
7105                         }
7106                         active_cu_number += counter;
7107                         if (i < 2 && j < 2)
7108                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7109                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7110                 }
7111         }
7112         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7113         mutex_unlock(&adev->grbm_idx_mutex);
7114
7115         cu_info->number = active_cu_number;
7116         cu_info->ao_cu_mask = ao_cu_mask;
7117         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7118         cu_info->max_waves_per_simd = 10;
7119         cu_info->max_scratch_slots_per_cu = 32;
7120         cu_info->wave_front_size = 64;
7121         cu_info->lds_size = 64;
7122 }
7123
7124 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7125 {
7126         .type = AMD_IP_BLOCK_TYPE_GFX,
7127         .major = 8,
7128         .minor = 0,
7129         .rev = 0,
7130         .funcs = &gfx_v8_0_ip_funcs,
7131 };
7132
7133 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7134 {
7135         .type = AMD_IP_BLOCK_TYPE_GFX,
7136         .major = 8,
7137         .minor = 1,
7138         .rev = 0,
7139         .funcs = &gfx_v8_0_ip_funcs,
7140 };
7141
7142 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7143 {
7144         uint64_t ce_payload_addr;
7145         int cnt_ce;
7146         union {
7147                 struct vi_ce_ib_state regular;
7148                 struct vi_ce_ib_state_chained_ib chained;
7149         } ce_payload = {};
7150
7151         if (ring->adev->virt.chained_ib_support) {
7152                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7153                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7154                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7155         } else {
7156                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7157                         offsetof(struct vi_gfx_meta_data, ce_payload);
7158                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7159         }
7160
7161         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7162         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7163                                 WRITE_DATA_DST_SEL(8) |
7164                                 WR_CONFIRM) |
7165                                 WRITE_DATA_CACHE_POLICY(0));
7166         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7167         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7168         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7169 }
7170
7171 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7172 {
7173         uint64_t de_payload_addr, gds_addr, csa_addr;
7174         int cnt_de;
7175         union {
7176                 struct vi_de_ib_state regular;
7177                 struct vi_de_ib_state_chained_ib chained;
7178         } de_payload = {};
7179
7180         csa_addr = amdgpu_csa_vaddr(ring->adev);
7181         gds_addr = csa_addr + 4096;
7182         if (ring->adev->virt.chained_ib_support) {
7183                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7184                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7185                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7186                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7187         } else {
7188                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7189                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7190                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7191                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7192         }
7193
7194         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7195         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7196                                 WRITE_DATA_DST_SEL(8) |
7197                                 WR_CONFIRM) |
7198                                 WRITE_DATA_CACHE_POLICY(0));
7199         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7200         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7201         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7202 }