Merge tag 'irqchip-fixes-5.6-2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734         switch (adev->asic_type) {
735         case CHIP_TOPAZ:
736                 amdgpu_device_program_register_sequence(adev,
737                                                         iceland_mgcg_cgcg_init,
738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         golden_settings_iceland_a11,
741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_golden_common_all,
744                                                         ARRAY_SIZE(iceland_golden_common_all));
745                 break;
746         case CHIP_FIJI:
747                 amdgpu_device_program_register_sequence(adev,
748                                                         fiji_mgcg_cgcg_init,
749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         golden_settings_fiji_a10,
752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_golden_common_all,
755                                                         ARRAY_SIZE(fiji_golden_common_all));
756                 break;
757
758         case CHIP_TONGA:
759                 amdgpu_device_program_register_sequence(adev,
760                                                         tonga_mgcg_cgcg_init,
761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         golden_settings_tonga_a11,
764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_golden_common_all,
767                                                         ARRAY_SIZE(tonga_golden_common_all));
768                 break;
769         case CHIP_VEGAM:
770                 amdgpu_device_program_register_sequence(adev,
771                                                         golden_settings_vegam_a11,
772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
773                 amdgpu_device_program_register_sequence(adev,
774                                                         vegam_golden_common_all,
775                                                         ARRAY_SIZE(vegam_golden_common_all));
776                 break;
777         case CHIP_POLARIS11:
778         case CHIP_POLARIS12:
779                 amdgpu_device_program_register_sequence(adev,
780                                                         golden_settings_polaris11_a11,
781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
782                 amdgpu_device_program_register_sequence(adev,
783                                                         polaris11_golden_common_all,
784                                                         ARRAY_SIZE(polaris11_golden_common_all));
785                 break;
786         case CHIP_POLARIS10:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         golden_settings_polaris10_a11,
789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         polaris10_golden_common_all,
792                                                         ARRAY_SIZE(polaris10_golden_common_all));
793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794                 if (adev->pdev->revision == 0xc7 &&
795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800                 }
801                 break;
802         case CHIP_CARRIZO:
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_mgcg_cgcg_init,
805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_settings_a11,
808                                                         ARRAY_SIZE(cz_golden_settings_a11));
809                 amdgpu_device_program_register_sequence(adev,
810                                                         cz_golden_common_all,
811                                                         ARRAY_SIZE(cz_golden_common_all));
812                 break;
813         case CHIP_STONEY:
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_mgcg_cgcg_init,
816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_settings_a11,
819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
820                 amdgpu_device_program_register_sequence(adev,
821                                                         stoney_golden_common_all,
822                                                         ARRAY_SIZE(stoney_golden_common_all));
823                 break;
824         default:
825                 break;
826         }
827 }
828
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831         adev->gfx.scratch.num_reg = 8;
832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838         struct amdgpu_device *adev = ring->adev;
839         uint32_t scratch;
840         uint32_t tmp = 0;
841         unsigned i;
842         int r;
843
844         r = amdgpu_gfx_scratch_get(adev, &scratch);
845         if (r)
846                 return r;
847
848         WREG32(scratch, 0xCAFEDEAD);
849         r = amdgpu_ring_alloc(ring, 3);
850         if (r)
851                 goto error_free_scratch;
852
853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855         amdgpu_ring_write(ring, 0xDEADBEEF);
856         amdgpu_ring_commit(ring);
857
858         for (i = 0; i < adev->usec_timeout; i++) {
859                 tmp = RREG32(scratch);
860                 if (tmp == 0xDEADBEEF)
861                         break;
862                 udelay(1);
863         }
864
865         if (i >= adev->usec_timeout)
866                 r = -ETIMEDOUT;
867
868 error_free_scratch:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875         struct amdgpu_device *adev = ring->adev;
876         struct amdgpu_ib ib;
877         struct dma_fence *f = NULL;
878
879         unsigned int index;
880         uint64_t gpu_addr;
881         uint32_t tmp;
882         long r;
883
884         r = amdgpu_device_wb_get(adev, &index);
885         if (r)
886                 return r;
887
888         gpu_addr = adev->wb.gpu_addr + (index * 4);
889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890         memset(&ib, 0, sizeof(ib));
891         r = amdgpu_ib_get(adev, NULL, 16, &ib);
892         if (r)
893                 goto err1;
894
895         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
896         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
897         ib.ptr[2] = lower_32_bits(gpu_addr);
898         ib.ptr[3] = upper_32_bits(gpu_addr);
899         ib.ptr[4] = 0xDEADBEEF;
900         ib.length_dw = 5;
901
902         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
903         if (r)
904                 goto err2;
905
906         r = dma_fence_wait_timeout(f, false, timeout);
907         if (r == 0) {
908                 r = -ETIMEDOUT;
909                 goto err2;
910         } else if (r < 0) {
911                 goto err2;
912         }
913
914         tmp = adev->wb.wb[index];
915         if (tmp == 0xDEADBEEF)
916                 r = 0;
917         else
918                 r = -EINVAL;
919
920 err2:
921         amdgpu_ib_free(adev, &ib, NULL);
922         dma_fence_put(f);
923 err1:
924         amdgpu_device_wb_free(adev, index);
925         return r;
926 }
927
928
929 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
930 {
931         release_firmware(adev->gfx.pfp_fw);
932         adev->gfx.pfp_fw = NULL;
933         release_firmware(adev->gfx.me_fw);
934         adev->gfx.me_fw = NULL;
935         release_firmware(adev->gfx.ce_fw);
936         adev->gfx.ce_fw = NULL;
937         release_firmware(adev->gfx.rlc_fw);
938         adev->gfx.rlc_fw = NULL;
939         release_firmware(adev->gfx.mec_fw);
940         adev->gfx.mec_fw = NULL;
941         if ((adev->asic_type != CHIP_STONEY) &&
942             (adev->asic_type != CHIP_TOPAZ))
943                 release_firmware(adev->gfx.mec2_fw);
944         adev->gfx.mec2_fw = NULL;
945
946         kfree(adev->gfx.rlc.register_list_format);
947 }
948
949 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
950 {
951         const char *chip_name;
952         char fw_name[30];
953         int err;
954         struct amdgpu_firmware_info *info = NULL;
955         const struct common_firmware_header *header = NULL;
956         const struct gfx_firmware_header_v1_0 *cp_hdr;
957         const struct rlc_firmware_header_v2_0 *rlc_hdr;
958         unsigned int *tmp = NULL, i;
959
960         DRM_DEBUG("\n");
961
962         switch (adev->asic_type) {
963         case CHIP_TOPAZ:
964                 chip_name = "topaz";
965                 break;
966         case CHIP_TONGA:
967                 chip_name = "tonga";
968                 break;
969         case CHIP_CARRIZO:
970                 chip_name = "carrizo";
971                 break;
972         case CHIP_FIJI:
973                 chip_name = "fiji";
974                 break;
975         case CHIP_STONEY:
976                 chip_name = "stoney";
977                 break;
978         case CHIP_POLARIS10:
979                 chip_name = "polaris10";
980                 break;
981         case CHIP_POLARIS11:
982                 chip_name = "polaris11";
983                 break;
984         case CHIP_POLARIS12:
985                 chip_name = "polaris12";
986                 break;
987         case CHIP_VEGAM:
988                 chip_name = "vegam";
989                 break;
990         default:
991                 BUG();
992         }
993
994         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
995                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
996                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997                 if (err == -ENOENT) {
998                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000                 }
1001         } else {
1002                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1003                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004         }
1005         if (err)
1006                 goto out;
1007         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1008         if (err)
1009                 goto out;
1010         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1011         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1012         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013
1014         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1015                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1016                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017                 if (err == -ENOENT) {
1018                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020                 }
1021         } else {
1022                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1023                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024         }
1025         if (err)
1026                 goto out;
1027         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1028         if (err)
1029                 goto out;
1030         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1031         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032
1033         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034
1035         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1036                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1037                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038                 if (err == -ENOENT) {
1039                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041                 }
1042         } else {
1043                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1044                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045         }
1046         if (err)
1047                 goto out;
1048         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1049         if (err)
1050                 goto out;
1051         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1052         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054
1055         /*
1056          * Support for MCBP/Virtualization in combination with chained IBs is
1057          * formal released on feature version #46
1058          */
1059         if (adev->gfx.ce_feature_version >= 46 &&
1060             adev->gfx.pfp_feature_version >= 46) {
1061                 adev->virt.chained_ib_support = true;
1062                 DRM_INFO("Chained IB support enabled!\n");
1063         } else
1064                 adev->virt.chained_ib_support = false;
1065
1066         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1067         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1068         if (err)
1069                 goto out;
1070         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1071         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1072         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1073         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074
1075         adev->gfx.rlc.save_and_restore_offset =
1076                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1077         adev->gfx.rlc.clear_state_descriptor_offset =
1078                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1079         adev->gfx.rlc.avail_scratch_ram_locations =
1080                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1081         adev->gfx.rlc.reg_restore_list_size =
1082                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1083         adev->gfx.rlc.reg_list_format_start =
1084                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1085         adev->gfx.rlc.reg_list_format_separate_start =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1087         adev->gfx.rlc.starting_offsets_start =
1088                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1089         adev->gfx.rlc.reg_list_format_size_bytes =
1090                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1091         adev->gfx.rlc.reg_list_size_bytes =
1092                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093
1094         adev->gfx.rlc.register_list_format =
1095                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1096                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097
1098         if (!adev->gfx.rlc.register_list_format) {
1099                 err = -ENOMEM;
1100                 goto out;
1101         }
1102
1103         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1104                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1105         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1106                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1107
1108         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109
1110         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1111                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1112         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1113                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114
1115         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1116                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1117                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118                 if (err == -ENOENT) {
1119                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121                 }
1122         } else {
1123                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1124                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125         }
1126         if (err)
1127                 goto out;
1128         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1129         if (err)
1130                 goto out;
1131         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1132         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1133         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134
1135         if ((adev->asic_type != CHIP_STONEY) &&
1136             (adev->asic_type != CHIP_TOPAZ)) {
1137                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1138                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1139                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140                         if (err == -ENOENT) {
1141                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                         }
1144                 } else {
1145                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1146                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1147                 }
1148                 if (!err) {
1149                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1150                         if (err)
1151                                 goto out;
1152                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1153                                 adev->gfx.mec2_fw->data;
1154                         adev->gfx.mec2_fw_version =
1155                                 le32_to_cpu(cp_hdr->header.ucode_version);
1156                         adev->gfx.mec2_feature_version =
1157                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1158                 } else {
1159                         err = 0;
1160                         adev->gfx.mec2_fw = NULL;
1161                 }
1162         }
1163
1164         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166         info->fw = adev->gfx.pfp_fw;
1167         header = (const struct common_firmware_header *)info->fw->data;
1168         adev->firmware.fw_size +=
1169                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173         info->fw = adev->gfx.me_fw;
1174         header = (const struct common_firmware_header *)info->fw->data;
1175         adev->firmware.fw_size +=
1176                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180         info->fw = adev->gfx.ce_fw;
1181         header = (const struct common_firmware_header *)info->fw->data;
1182         adev->firmware.fw_size +=
1183                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187         info->fw = adev->gfx.rlc_fw;
1188         header = (const struct common_firmware_header *)info->fw->data;
1189         adev->firmware.fw_size +=
1190                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194         info->fw = adev->gfx.mec_fw;
1195         header = (const struct common_firmware_header *)info->fw->data;
1196         adev->firmware.fw_size +=
1197                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198
1199         /* we need account JT in */
1200         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201         adev->firmware.fw_size +=
1202                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203
1204         if (amdgpu_sriov_vf(adev)) {
1205                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207                 info->fw = adev->gfx.mec_fw;
1208                 adev->firmware.fw_size +=
1209                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210         }
1211
1212         if (adev->gfx.mec2_fw) {
1213                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215                 info->fw = adev->gfx.mec2_fw;
1216                 header = (const struct common_firmware_header *)info->fw->data;
1217                 adev->firmware.fw_size +=
1218                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219         }
1220
1221 out:
1222         if (err) {
1223                 dev_err(adev->dev,
1224                         "gfx8: Failed to load firmware \"%s\"\n",
1225                         fw_name);
1226                 release_firmware(adev->gfx.pfp_fw);
1227                 adev->gfx.pfp_fw = NULL;
1228                 release_firmware(adev->gfx.me_fw);
1229                 adev->gfx.me_fw = NULL;
1230                 release_firmware(adev->gfx.ce_fw);
1231                 adev->gfx.ce_fw = NULL;
1232                 release_firmware(adev->gfx.rlc_fw);
1233                 adev->gfx.rlc_fw = NULL;
1234                 release_firmware(adev->gfx.mec_fw);
1235                 adev->gfx.mec_fw = NULL;
1236                 release_firmware(adev->gfx.mec2_fw);
1237                 adev->gfx.mec2_fw = NULL;
1238         }
1239         return err;
1240 }
1241
1242 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1243                                     volatile u32 *buffer)
1244 {
1245         u32 count = 0, i;
1246         const struct cs_section_def *sect = NULL;
1247         const struct cs_extent_def *ext = NULL;
1248
1249         if (adev->gfx.rlc.cs_data == NULL)
1250                 return;
1251         if (buffer == NULL)
1252                 return;
1253
1254         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1255         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1256
1257         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1258         buffer[count++] = cpu_to_le32(0x80000000);
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260
1261         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1262                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1263                         if (sect->id == SECT_CONTEXT) {
1264                                 buffer[count++] =
1265                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1266                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1267                                                 PACKET3_SET_CONTEXT_REG_START);
1268                                 for (i = 0; i < ext->reg_count; i++)
1269                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1270                         } else {
1271                                 return;
1272                         }
1273                 }
1274         }
1275
1276         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1277         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1278                         PACKET3_SET_CONTEXT_REG_START);
1279         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1281
1282         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1283         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1284
1285         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1286         buffer[count++] = cpu_to_le32(0);
1287 }
1288
1289 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1290 {
1291         if (adev->asic_type == CHIP_CARRIZO)
1292                 return 5;
1293         else
1294                 return 4;
1295 }
1296
1297 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1298 {
1299         const struct cs_section_def *cs_data;
1300         int r;
1301
1302         adev->gfx.rlc.cs_data = vi_cs_data;
1303
1304         cs_data = adev->gfx.rlc.cs_data;
1305
1306         if (cs_data) {
1307                 /* init clear state block */
1308                 r = amdgpu_gfx_rlc_init_csb(adev);
1309                 if (r)
1310                         return r;
1311         }
1312
1313         if ((adev->asic_type == CHIP_CARRIZO) ||
1314             (adev->asic_type == CHIP_STONEY)) {
1315                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1316                 r = amdgpu_gfx_rlc_init_cpt(adev);
1317                 if (r)
1318                         return r;
1319         }
1320
1321         return 0;
1322 }
1323
1324 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 {
1326         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 }
1328
1329 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 {
1331         int r;
1332         u32 *hpd;
1333         size_t mec_hpd_size;
1334
1335         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336
1337         /* take ownership of the relevant compute queues */
1338         amdgpu_gfx_compute_queue_acquire(adev);
1339
1340         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341
1342         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343                                       AMDGPU_GEM_DOMAIN_VRAM,
1344                                       &adev->gfx.mec.hpd_eop_obj,
1345                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1346                                       (void **)&hpd);
1347         if (r) {
1348                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1349                 return r;
1350         }
1351
1352         memset(hpd, 0, mec_hpd_size);
1353
1354         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1355         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1356
1357         return 0;
1358 }
1359
1360 static const u32 vgpr_init_compute_shader[] =
1361 {
1362         0x7e000209, 0x7e020208,
1363         0x7e040207, 0x7e060206,
1364         0x7e080205, 0x7e0a0204,
1365         0x7e0c0203, 0x7e0e0202,
1366         0x7e100201, 0x7e120200,
1367         0x7e140209, 0x7e160208,
1368         0x7e180207, 0x7e1a0206,
1369         0x7e1c0205, 0x7e1e0204,
1370         0x7e200203, 0x7e220202,
1371         0x7e240201, 0x7e260200,
1372         0x7e280209, 0x7e2a0208,
1373         0x7e2c0207, 0x7e2e0206,
1374         0x7e300205, 0x7e320204,
1375         0x7e340203, 0x7e360202,
1376         0x7e380201, 0x7e3a0200,
1377         0x7e3c0209, 0x7e3e0208,
1378         0x7e400207, 0x7e420206,
1379         0x7e440205, 0x7e460204,
1380         0x7e480203, 0x7e4a0202,
1381         0x7e4c0201, 0x7e4e0200,
1382         0x7e500209, 0x7e520208,
1383         0x7e540207, 0x7e560206,
1384         0x7e580205, 0x7e5a0204,
1385         0x7e5c0203, 0x7e5e0202,
1386         0x7e600201, 0x7e620200,
1387         0x7e640209, 0x7e660208,
1388         0x7e680207, 0x7e6a0206,
1389         0x7e6c0205, 0x7e6e0204,
1390         0x7e700203, 0x7e720202,
1391         0x7e740201, 0x7e760200,
1392         0x7e780209, 0x7e7a0208,
1393         0x7e7c0207, 0x7e7e0206,
1394         0xbf8a0000, 0xbf810000,
1395 };
1396
1397 static const u32 sgpr_init_compute_shader[] =
1398 {
1399         0xbe8a0100, 0xbe8c0102,
1400         0xbe8e0104, 0xbe900106,
1401         0xbe920108, 0xbe940100,
1402         0xbe960102, 0xbe980104,
1403         0xbe9a0106, 0xbe9c0108,
1404         0xbe9e0100, 0xbea00102,
1405         0xbea20104, 0xbea40106,
1406         0xbea60108, 0xbea80100,
1407         0xbeaa0102, 0xbeac0104,
1408         0xbeae0106, 0xbeb00108,
1409         0xbeb20100, 0xbeb40102,
1410         0xbeb60104, 0xbeb80106,
1411         0xbeba0108, 0xbebc0100,
1412         0xbebe0102, 0xbec00104,
1413         0xbec20106, 0xbec40108,
1414         0xbec60100, 0xbec80102,
1415         0xbee60004, 0xbee70005,
1416         0xbeea0006, 0xbeeb0007,
1417         0xbee80008, 0xbee90009,
1418         0xbefc0000, 0xbf8a0000,
1419         0xbf810000, 0x00000000,
1420 };
1421
1422 static const u32 vgpr_init_regs[] =
1423 {
1424         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1425         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1426         mmCOMPUTE_NUM_THREAD_X, 256*4,
1427         mmCOMPUTE_NUM_THREAD_Y, 1,
1428         mmCOMPUTE_NUM_THREAD_Z, 1,
1429         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1430         mmCOMPUTE_PGM_RSRC2, 20,
1431         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1432         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1433         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1434         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1435         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1436         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1437         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1438         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1439         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1440         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1441 };
1442
1443 static const u32 sgpr1_init_regs[] =
1444 {
1445         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1446         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1447         mmCOMPUTE_NUM_THREAD_X, 256*5,
1448         mmCOMPUTE_NUM_THREAD_Y, 1,
1449         mmCOMPUTE_NUM_THREAD_Z, 1,
1450         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1451         mmCOMPUTE_PGM_RSRC2, 20,
1452         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1453         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1454         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1455         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1456         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1457         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1458         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1459         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1460         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1461         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1462 };
1463
1464 static const u32 sgpr2_init_regs[] =
1465 {
1466         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1467         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1468         mmCOMPUTE_NUM_THREAD_X, 256*5,
1469         mmCOMPUTE_NUM_THREAD_Y, 1,
1470         mmCOMPUTE_NUM_THREAD_Z, 1,
1471         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1472         mmCOMPUTE_PGM_RSRC2, 20,
1473         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1474         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1475         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1476         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1477         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1478         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1479         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1480         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1481         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1482         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1483 };
1484
1485 static const u32 sec_ded_counter_registers[] =
1486 {
1487         mmCPC_EDC_ATC_CNT,
1488         mmCPC_EDC_SCRATCH_CNT,
1489         mmCPC_EDC_UCODE_CNT,
1490         mmCPF_EDC_ATC_CNT,
1491         mmCPF_EDC_ROQ_CNT,
1492         mmCPF_EDC_TAG_CNT,
1493         mmCPG_EDC_ATC_CNT,
1494         mmCPG_EDC_DMA_CNT,
1495         mmCPG_EDC_TAG_CNT,
1496         mmDC_EDC_CSINVOC_CNT,
1497         mmDC_EDC_RESTORE_CNT,
1498         mmDC_EDC_STATE_CNT,
1499         mmGDS_EDC_CNT,
1500         mmGDS_EDC_GRBM_CNT,
1501         mmGDS_EDC_OA_DED,
1502         mmSPI_EDC_CNT,
1503         mmSQC_ATC_EDC_GATCL1_CNT,
1504         mmSQC_EDC_CNT,
1505         mmSQ_EDC_DED_CNT,
1506         mmSQ_EDC_INFO,
1507         mmSQ_EDC_SEC_CNT,
1508         mmTCC_EDC_CNT,
1509         mmTCP_ATC_EDC_GATCL1_CNT,
1510         mmTCP_EDC_CNT,
1511         mmTD_EDC_CNT
1512 };
1513
1514 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1515 {
1516         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1517         struct amdgpu_ib ib;
1518         struct dma_fence *f = NULL;
1519         int r, i;
1520         u32 tmp;
1521         unsigned total_size, vgpr_offset, sgpr_offset;
1522         u64 gpu_addr;
1523
1524         /* only supported on CZ */
1525         if (adev->asic_type != CHIP_CARRIZO)
1526                 return 0;
1527
1528         /* bail if the compute ring is not ready */
1529         if (!ring->sched.ready)
1530                 return 0;
1531
1532         tmp = RREG32(mmGB_EDC_MODE);
1533         WREG32(mmGB_EDC_MODE, 0);
1534
1535         total_size =
1536                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1537         total_size +=
1538                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1539         total_size +=
1540                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1541         total_size = ALIGN(total_size, 256);
1542         vgpr_offset = total_size;
1543         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1544         sgpr_offset = total_size;
1545         total_size += sizeof(sgpr_init_compute_shader);
1546
1547         /* allocate an indirect buffer to put the commands in */
1548         memset(&ib, 0, sizeof(ib));
1549         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1550         if (r) {
1551                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1552                 return r;
1553         }
1554
1555         /* load the compute shaders */
1556         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1557                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1558
1559         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1560                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1561
1562         /* init the ib length to 0 */
1563         ib.length_dw = 0;
1564
1565         /* VGPR */
1566         /* write the register state for the compute dispatch */
1567         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1568                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1569                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1570                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1571         }
1572         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1573         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1574         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1575         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1576         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1577         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1578
1579         /* write dispatch packet */
1580         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1581         ib.ptr[ib.length_dw++] = 8; /* x */
1582         ib.ptr[ib.length_dw++] = 1; /* y */
1583         ib.ptr[ib.length_dw++] = 1; /* z */
1584         ib.ptr[ib.length_dw++] =
1585                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1586
1587         /* write CS partial flush packet */
1588         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1589         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1590
1591         /* SGPR1 */
1592         /* write the register state for the compute dispatch */
1593         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1594                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1595                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1596                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1597         }
1598         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1599         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1600         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1601         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1602         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1603         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1604
1605         /* write dispatch packet */
1606         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1607         ib.ptr[ib.length_dw++] = 8; /* x */
1608         ib.ptr[ib.length_dw++] = 1; /* y */
1609         ib.ptr[ib.length_dw++] = 1; /* z */
1610         ib.ptr[ib.length_dw++] =
1611                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1612
1613         /* write CS partial flush packet */
1614         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1615         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1616
1617         /* SGPR2 */
1618         /* write the register state for the compute dispatch */
1619         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1620                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1621                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1622                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1623         }
1624         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1625         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1626         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1627         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1628         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1629         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1630
1631         /* write dispatch packet */
1632         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1633         ib.ptr[ib.length_dw++] = 8; /* x */
1634         ib.ptr[ib.length_dw++] = 1; /* y */
1635         ib.ptr[ib.length_dw++] = 1; /* z */
1636         ib.ptr[ib.length_dw++] =
1637                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1638
1639         /* write CS partial flush packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1641         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1642
1643         /* shedule the ib on the ring */
1644         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1645         if (r) {
1646                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1647                 goto fail;
1648         }
1649
1650         /* wait for the GPU to finish processing the IB */
1651         r = dma_fence_wait(f, false);
1652         if (r) {
1653                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1654                 goto fail;
1655         }
1656
1657         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1658         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1659         WREG32(mmGB_EDC_MODE, tmp);
1660
1661         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1662         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1663         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1664
1665
1666         /* read back registers to clear the counters */
1667         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1668                 RREG32(sec_ded_counter_registers[i]);
1669
1670 fail:
1671         amdgpu_ib_free(adev, &ib, NULL);
1672         dma_fence_put(f);
1673
1674         return r;
1675 }
1676
1677 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1678 {
1679         u32 gb_addr_config;
1680         u32 mc_arb_ramcfg;
1681         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1682         u32 tmp;
1683         int ret;
1684
1685         switch (adev->asic_type) {
1686         case CHIP_TOPAZ:
1687                 adev->gfx.config.max_shader_engines = 1;
1688                 adev->gfx.config.max_tile_pipes = 2;
1689                 adev->gfx.config.max_cu_per_sh = 6;
1690                 adev->gfx.config.max_sh_per_se = 1;
1691                 adev->gfx.config.max_backends_per_se = 2;
1692                 adev->gfx.config.max_texture_channel_caches = 2;
1693                 adev->gfx.config.max_gprs = 256;
1694                 adev->gfx.config.max_gs_threads = 32;
1695                 adev->gfx.config.max_hw_contexts = 8;
1696
1697                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1698                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1699                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1700                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1701                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1702                 break;
1703         case CHIP_FIJI:
1704                 adev->gfx.config.max_shader_engines = 4;
1705                 adev->gfx.config.max_tile_pipes = 16;
1706                 adev->gfx.config.max_cu_per_sh = 16;
1707                 adev->gfx.config.max_sh_per_se = 1;
1708                 adev->gfx.config.max_backends_per_se = 4;
1709                 adev->gfx.config.max_texture_channel_caches = 16;
1710                 adev->gfx.config.max_gprs = 256;
1711                 adev->gfx.config.max_gs_threads = 32;
1712                 adev->gfx.config.max_hw_contexts = 8;
1713
1714                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1715                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1716                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1717                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1718                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1719                 break;
1720         case CHIP_POLARIS11:
1721         case CHIP_POLARIS12:
1722                 ret = amdgpu_atombios_get_gfx_info(adev);
1723                 if (ret)
1724                         return ret;
1725                 adev->gfx.config.max_gprs = 256;
1726                 adev->gfx.config.max_gs_threads = 32;
1727                 adev->gfx.config.max_hw_contexts = 8;
1728
1729                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1730                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1731                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1732                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1733                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1734                 break;
1735         case CHIP_POLARIS10:
1736         case CHIP_VEGAM:
1737                 ret = amdgpu_atombios_get_gfx_info(adev);
1738                 if (ret)
1739                         return ret;
1740                 adev->gfx.config.max_gprs = 256;
1741                 adev->gfx.config.max_gs_threads = 32;
1742                 adev->gfx.config.max_hw_contexts = 8;
1743
1744                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1745                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1746                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1747                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1748                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1749                 break;
1750         case CHIP_TONGA:
1751                 adev->gfx.config.max_shader_engines = 4;
1752                 adev->gfx.config.max_tile_pipes = 8;
1753                 adev->gfx.config.max_cu_per_sh = 8;
1754                 adev->gfx.config.max_sh_per_se = 1;
1755                 adev->gfx.config.max_backends_per_se = 2;
1756                 adev->gfx.config.max_texture_channel_caches = 8;
1757                 adev->gfx.config.max_gprs = 256;
1758                 adev->gfx.config.max_gs_threads = 32;
1759                 adev->gfx.config.max_hw_contexts = 8;
1760
1761                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1762                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1763                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1764                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1765                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1766                 break;
1767         case CHIP_CARRIZO:
1768                 adev->gfx.config.max_shader_engines = 1;
1769                 adev->gfx.config.max_tile_pipes = 2;
1770                 adev->gfx.config.max_sh_per_se = 1;
1771                 adev->gfx.config.max_backends_per_se = 2;
1772                 adev->gfx.config.max_cu_per_sh = 8;
1773                 adev->gfx.config.max_texture_channel_caches = 2;
1774                 adev->gfx.config.max_gprs = 256;
1775                 adev->gfx.config.max_gs_threads = 32;
1776                 adev->gfx.config.max_hw_contexts = 8;
1777
1778                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1779                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1780                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1781                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1782                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1783                 break;
1784         case CHIP_STONEY:
1785                 adev->gfx.config.max_shader_engines = 1;
1786                 adev->gfx.config.max_tile_pipes = 2;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 1;
1789                 adev->gfx.config.max_cu_per_sh = 3;
1790                 adev->gfx.config.max_texture_channel_caches = 2;
1791                 adev->gfx.config.max_gprs = 256;
1792                 adev->gfx.config.max_gs_threads = 16;
1793                 adev->gfx.config.max_hw_contexts = 8;
1794
1795                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1796                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1797                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1798                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1799                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1800                 break;
1801         default:
1802                 adev->gfx.config.max_shader_engines = 2;
1803                 adev->gfx.config.max_tile_pipes = 4;
1804                 adev->gfx.config.max_cu_per_sh = 2;
1805                 adev->gfx.config.max_sh_per_se = 1;
1806                 adev->gfx.config.max_backends_per_se = 2;
1807                 adev->gfx.config.max_texture_channel_caches = 4;
1808                 adev->gfx.config.max_gprs = 256;
1809                 adev->gfx.config.max_gs_threads = 32;
1810                 adev->gfx.config.max_hw_contexts = 8;
1811
1812                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1813                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1814                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1815                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1816                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1817                 break;
1818         }
1819
1820         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1821         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1822
1823         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1824         adev->gfx.config.mem_max_burst_length_bytes = 256;
1825         if (adev->flags & AMD_IS_APU) {
1826                 /* Get memory bank mapping mode. */
1827                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1828                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1829                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1830
1831                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1832                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1833                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1834
1835                 /* Validate settings in case only one DIMM installed. */
1836                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1837                         dimm00_addr_map = 0;
1838                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1839                         dimm01_addr_map = 0;
1840                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1841                         dimm10_addr_map = 0;
1842                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1843                         dimm11_addr_map = 0;
1844
1845                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1846                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1847                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1848                         adev->gfx.config.mem_row_size_in_kb = 2;
1849                 else
1850                         adev->gfx.config.mem_row_size_in_kb = 1;
1851         } else {
1852                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1853                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1854                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1855                         adev->gfx.config.mem_row_size_in_kb = 4;
1856         }
1857
1858         adev->gfx.config.shader_engine_tile_size = 32;
1859         adev->gfx.config.num_gpus = 1;
1860         adev->gfx.config.multi_gpu_tile_size = 64;
1861
1862         /* fix up row size */
1863         switch (adev->gfx.config.mem_row_size_in_kb) {
1864         case 1:
1865         default:
1866                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1867                 break;
1868         case 2:
1869                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1870                 break;
1871         case 4:
1872                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1873                 break;
1874         }
1875         adev->gfx.config.gb_addr_config = gb_addr_config;
1876
1877         return 0;
1878 }
1879
1880 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1881                                         int mec, int pipe, int queue)
1882 {
1883         int r;
1884         unsigned irq_type;
1885         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1886
1887         ring = &adev->gfx.compute_ring[ring_id];
1888
1889         /* mec0 is me1 */
1890         ring->me = mec + 1;
1891         ring->pipe = pipe;
1892         ring->queue = queue;
1893
1894         ring->ring_obj = NULL;
1895         ring->use_doorbell = true;
1896         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1897         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1898                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1899         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1900
1901         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1902                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1903                 + ring->pipe;
1904
1905         /* type-2 packets are deprecated on MEC, use type-3 instead */
1906         r = amdgpu_ring_init(adev, ring, 1024,
1907                         &adev->gfx.eop_irq, irq_type);
1908         if (r)
1909                 return r;
1910
1911
1912         return 0;
1913 }
1914
1915 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1916
1917 static int gfx_v8_0_sw_init(void *handle)
1918 {
1919         int i, j, k, r, ring_id;
1920         struct amdgpu_ring *ring;
1921         struct amdgpu_kiq *kiq;
1922         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1923
1924         switch (adev->asic_type) {
1925         case CHIP_TONGA:
1926         case CHIP_CARRIZO:
1927         case CHIP_FIJI:
1928         case CHIP_POLARIS10:
1929         case CHIP_POLARIS11:
1930         case CHIP_POLARIS12:
1931         case CHIP_VEGAM:
1932                 adev->gfx.mec.num_mec = 2;
1933                 break;
1934         case CHIP_TOPAZ:
1935         case CHIP_STONEY:
1936         default:
1937                 adev->gfx.mec.num_mec = 1;
1938                 break;
1939         }
1940
1941         adev->gfx.mec.num_pipe_per_mec = 4;
1942         adev->gfx.mec.num_queue_per_pipe = 8;
1943
1944         /* EOP Event */
1945         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1946         if (r)
1947                 return r;
1948
1949         /* Privileged reg */
1950         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1951                               &adev->gfx.priv_reg_irq);
1952         if (r)
1953                 return r;
1954
1955         /* Privileged inst */
1956         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1957                               &adev->gfx.priv_inst_irq);
1958         if (r)
1959                 return r;
1960
1961         /* Add CP EDC/ECC irq  */
1962         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1963                               &adev->gfx.cp_ecc_error_irq);
1964         if (r)
1965                 return r;
1966
1967         /* SQ interrupts. */
1968         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1969                               &adev->gfx.sq_irq);
1970         if (r) {
1971                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1972                 return r;
1973         }
1974
1975         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1976
1977         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1978
1979         gfx_v8_0_scratch_init(adev);
1980
1981         r = gfx_v8_0_init_microcode(adev);
1982         if (r) {
1983                 DRM_ERROR("Failed to load gfx firmware!\n");
1984                 return r;
1985         }
1986
1987         r = adev->gfx.rlc.funcs->init(adev);
1988         if (r) {
1989                 DRM_ERROR("Failed to init rlc BOs!\n");
1990                 return r;
1991         }
1992
1993         r = gfx_v8_0_mec_init(adev);
1994         if (r) {
1995                 DRM_ERROR("Failed to init MEC BOs!\n");
1996                 return r;
1997         }
1998
1999         /* set up the gfx ring */
2000         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2001                 ring = &adev->gfx.gfx_ring[i];
2002                 ring->ring_obj = NULL;
2003                 sprintf(ring->name, "gfx");
2004                 /* no gfx doorbells on iceland */
2005                 if (adev->asic_type != CHIP_TOPAZ) {
2006                         ring->use_doorbell = true;
2007                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2008                 }
2009
2010                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2011                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2012                 if (r)
2013                         return r;
2014         }
2015
2016
2017         /* set up the compute queues - allocate horizontally across pipes */
2018         ring_id = 0;
2019         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2020                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2021                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2022                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2023                                         continue;
2024
2025                                 r = gfx_v8_0_compute_ring_init(adev,
2026                                                                 ring_id,
2027                                                                 i, k, j);
2028                                 if (r)
2029                                         return r;
2030
2031                                 ring_id++;
2032                         }
2033                 }
2034         }
2035
2036         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2037         if (r) {
2038                 DRM_ERROR("Failed to init KIQ BOs!\n");
2039                 return r;
2040         }
2041
2042         kiq = &adev->gfx.kiq;
2043         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2044         if (r)
2045                 return r;
2046
2047         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2048         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2049         if (r)
2050                 return r;
2051
2052         adev->gfx.ce_ram_size = 0x8000;
2053
2054         r = gfx_v8_0_gpu_early_init(adev);
2055         if (r)
2056                 return r;
2057
2058         return 0;
2059 }
2060
2061 static int gfx_v8_0_sw_fini(void *handle)
2062 {
2063         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2064         int i;
2065
2066         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2067                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2068         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2069                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2070
2071         amdgpu_gfx_mqd_sw_fini(adev);
2072         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2073         amdgpu_gfx_kiq_fini(adev);
2074
2075         gfx_v8_0_mec_fini(adev);
2076         amdgpu_gfx_rlc_fini(adev);
2077         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2078                                 &adev->gfx.rlc.clear_state_gpu_addr,
2079                                 (void **)&adev->gfx.rlc.cs_ptr);
2080         if ((adev->asic_type == CHIP_CARRIZO) ||
2081             (adev->asic_type == CHIP_STONEY)) {
2082                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2083                                 &adev->gfx.rlc.cp_table_gpu_addr,
2084                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2085         }
2086         gfx_v8_0_free_microcode(adev);
2087
2088         return 0;
2089 }
2090
2091 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2092 {
2093         uint32_t *modearray, *mod2array;
2094         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2095         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2096         u32 reg_offset;
2097
2098         modearray = adev->gfx.config.tile_mode_array;
2099         mod2array = adev->gfx.config.macrotile_mode_array;
2100
2101         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2102                 modearray[reg_offset] = 0;
2103
2104         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2105                 mod2array[reg_offset] = 0;
2106
2107         switch (adev->asic_type) {
2108         case CHIP_TOPAZ:
2109                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2110                                 PIPE_CONFIG(ADDR_SURF_P2) |
2111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2113                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2114                                 PIPE_CONFIG(ADDR_SURF_P2) |
2115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2117                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118                                 PIPE_CONFIG(ADDR_SURF_P2) |
2119                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2121                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2122                                 PIPE_CONFIG(ADDR_SURF_P2) |
2123                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2125                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2126                                 PIPE_CONFIG(ADDR_SURF_P2) |
2127                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2129                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2130                                 PIPE_CONFIG(ADDR_SURF_P2) |
2131                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2133                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2134                                 PIPE_CONFIG(ADDR_SURF_P2) |
2135                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2138                                 PIPE_CONFIG(ADDR_SURF_P2));
2139                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2140                                 PIPE_CONFIG(ADDR_SURF_P2) |
2141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2142                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2143                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144                                  PIPE_CONFIG(ADDR_SURF_P2) |
2145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2147                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2148                                  PIPE_CONFIG(ADDR_SURF_P2) |
2149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2151                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2152                                  PIPE_CONFIG(ADDR_SURF_P2) |
2153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2155                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156                                  PIPE_CONFIG(ADDR_SURF_P2) |
2157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2159                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2160                                  PIPE_CONFIG(ADDR_SURF_P2) |
2161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2164                                  PIPE_CONFIG(ADDR_SURF_P2) |
2165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2167                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2168                                  PIPE_CONFIG(ADDR_SURF_P2) |
2169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2171                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2172                                  PIPE_CONFIG(ADDR_SURF_P2) |
2173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2175                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2176                                  PIPE_CONFIG(ADDR_SURF_P2) |
2177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2179                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2180                                  PIPE_CONFIG(ADDR_SURF_P2) |
2181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2183                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2184                                  PIPE_CONFIG(ADDR_SURF_P2) |
2185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2187                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2188                                  PIPE_CONFIG(ADDR_SURF_P2) |
2189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2191                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2192                                  PIPE_CONFIG(ADDR_SURF_P2) |
2193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2195                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2196                                  PIPE_CONFIG(ADDR_SURF_P2) |
2197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2199                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200                                  PIPE_CONFIG(ADDR_SURF_P2) |
2201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2204                                  PIPE_CONFIG(ADDR_SURF_P2) |
2205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2208                                  PIPE_CONFIG(ADDR_SURF_P2) |
2209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2211
2212                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2215                                 NUM_BANKS(ADDR_SURF_8_BANK));
2216                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2219                                 NUM_BANKS(ADDR_SURF_8_BANK));
2220                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2223                                 NUM_BANKS(ADDR_SURF_8_BANK));
2224                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2227                                 NUM_BANKS(ADDR_SURF_8_BANK));
2228                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2231                                 NUM_BANKS(ADDR_SURF_8_BANK));
2232                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2235                                 NUM_BANKS(ADDR_SURF_8_BANK));
2236                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239                                 NUM_BANKS(ADDR_SURF_8_BANK));
2240                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2243                                 NUM_BANKS(ADDR_SURF_16_BANK));
2244                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2247                                 NUM_BANKS(ADDR_SURF_16_BANK));
2248                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2249                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251                                  NUM_BANKS(ADDR_SURF_16_BANK));
2252                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2253                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2254                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2255                                  NUM_BANKS(ADDR_SURF_16_BANK));
2256                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2257                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2258                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2259                                  NUM_BANKS(ADDR_SURF_16_BANK));
2260                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2263                                  NUM_BANKS(ADDR_SURF_16_BANK));
2264                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267                                  NUM_BANKS(ADDR_SURF_8_BANK));
2268
2269                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2270                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2271                             reg_offset != 23)
2272                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2273
2274                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2275                         if (reg_offset != 7)
2276                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2277
2278                 break;
2279         case CHIP_FIJI:
2280         case CHIP_VEGAM:
2281                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2282                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2285                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2286                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2289                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2293                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2297                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2301                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2303                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2305                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2306                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2309                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2310                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2311                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2313                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2314                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2315                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2316                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2318                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2319                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2323                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2324                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2327                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2328                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2331                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2332                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2335                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2340                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2347                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2351                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2352                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2355                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2356                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2359                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2363                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2367                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2368                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2371                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2372                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2375                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2376                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2379                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2380                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2383                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2384                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2387                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2388                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2391                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2399                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2400                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2403
2404                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407                                 NUM_BANKS(ADDR_SURF_8_BANK));
2408                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411                                 NUM_BANKS(ADDR_SURF_8_BANK));
2412                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2414                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2415                                 NUM_BANKS(ADDR_SURF_8_BANK));
2416                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2418                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2419                                 NUM_BANKS(ADDR_SURF_8_BANK));
2420                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2422                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2423                                 NUM_BANKS(ADDR_SURF_8_BANK));
2424                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2427                                 NUM_BANKS(ADDR_SURF_8_BANK));
2428                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431                                 NUM_BANKS(ADDR_SURF_8_BANK));
2432                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2434                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2435                                 NUM_BANKS(ADDR_SURF_8_BANK));
2436                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439                                 NUM_BANKS(ADDR_SURF_8_BANK));
2440                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2443                                  NUM_BANKS(ADDR_SURF_8_BANK));
2444                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447                                  NUM_BANKS(ADDR_SURF_8_BANK));
2448                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2450                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451                                  NUM_BANKS(ADDR_SURF_8_BANK));
2452                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                                  NUM_BANKS(ADDR_SURF_8_BANK));
2456                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                                  NUM_BANKS(ADDR_SURF_4_BANK));
2460
2461                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2462                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2463
2464                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2465                         if (reg_offset != 7)
2466                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2467
2468                 break;
2469         case CHIP_TONGA:
2470                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2472                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2473                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2475                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2476                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2477                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2478                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2479                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2480                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2481                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2482                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2483                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2485                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2486                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2487                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2489                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2490                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2491                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2492                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2493                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2494                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2495                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2497                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2498                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2500                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2501                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2502                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2503                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2504                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2505                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2507                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2512                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2513                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2516                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2517                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2520                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2528                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2529                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2532                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2536                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2540                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2541                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2545                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2552                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2556                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2557                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2560                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2561                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2564                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2565                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2568                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2569                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2571                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2572                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2573                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2575                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2576                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2580                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2584                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2585                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2588                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2589                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2592
2593                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2595                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2596                                 NUM_BANKS(ADDR_SURF_16_BANK));
2597                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2600                                 NUM_BANKS(ADDR_SURF_16_BANK));
2601                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2603                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2604                                 NUM_BANKS(ADDR_SURF_16_BANK));
2605                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2607                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2608                                 NUM_BANKS(ADDR_SURF_16_BANK));
2609                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2612                                 NUM_BANKS(ADDR_SURF_16_BANK));
2613                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616                                 NUM_BANKS(ADDR_SURF_16_BANK));
2617                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620                                 NUM_BANKS(ADDR_SURF_16_BANK));
2621                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2623                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2624                                 NUM_BANKS(ADDR_SURF_16_BANK));
2625                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2627                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2628                                 NUM_BANKS(ADDR_SURF_16_BANK));
2629                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2632                                  NUM_BANKS(ADDR_SURF_16_BANK));
2633                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2635                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2636                                  NUM_BANKS(ADDR_SURF_16_BANK));
2637                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2639                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2640                                  NUM_BANKS(ADDR_SURF_8_BANK));
2641                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2644                                  NUM_BANKS(ADDR_SURF_4_BANK));
2645                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2647                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2648                                  NUM_BANKS(ADDR_SURF_4_BANK));
2649
2650                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2651                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2652
2653                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2654                         if (reg_offset != 7)
2655                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2656
2657                 break;
2658         case CHIP_POLARIS11:
2659         case CHIP_POLARIS12:
2660                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2668                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2672                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2681                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2684                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2693                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2694                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2697                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2698                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2701                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2702                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2705                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2706                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2709                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2710                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2726                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2742                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2746                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2750                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2754                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2755                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2759                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2770                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2774                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2778                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2781                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2782
2783                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786                                 NUM_BANKS(ADDR_SURF_16_BANK));
2787
2788                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2790                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2791                                 NUM_BANKS(ADDR_SURF_16_BANK));
2792
2793                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2795                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2796                                 NUM_BANKS(ADDR_SURF_16_BANK));
2797
2798                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2800                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2801                                 NUM_BANKS(ADDR_SURF_16_BANK));
2802
2803                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2805                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2806                                 NUM_BANKS(ADDR_SURF_16_BANK));
2807
2808                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2811                                 NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2819                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2820                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821                                 NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2824                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2825                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826                                 NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2846                                 NUM_BANKS(ADDR_SURF_8_BANK));
2847
2848                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2851                                 NUM_BANKS(ADDR_SURF_4_BANK));
2852
2853                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2854                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2855
2856                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2857                         if (reg_offset != 7)
2858                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2859
2860                 break;
2861         case CHIP_POLARIS10:
2862                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2863                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2870                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2874                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2878                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2882                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2883                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2886                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2895                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2896                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2900                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2908                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2912                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2928                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2932                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2944                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2945                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2948                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2952                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2953                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2956                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2957                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2961                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2965                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2969                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2976                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2977                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2980                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2981                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2984
2985                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2987                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2988                                 NUM_BANKS(ADDR_SURF_16_BANK));
2989
2990                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2992                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2993                                 NUM_BANKS(ADDR_SURF_16_BANK));
2994
2995                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999
3000                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004
3005                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3007                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3008                                 NUM_BANKS(ADDR_SURF_16_BANK));
3009
3010                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3012                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3013                                 NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3022                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023                                 NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028                                 NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043                                 NUM_BANKS(ADDR_SURF_8_BANK));
3044
3045                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3048                                 NUM_BANKS(ADDR_SURF_4_BANK));
3049
3050                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3053                                 NUM_BANKS(ADDR_SURF_4_BANK));
3054
3055                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3056                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3057
3058                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3059                         if (reg_offset != 7)
3060                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3061
3062                 break;
3063         case CHIP_STONEY:
3064                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3065                                 PIPE_CONFIG(ADDR_SURF_P2) |
3066                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3068                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3069                                 PIPE_CONFIG(ADDR_SURF_P2) |
3070                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3072                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3073                                 PIPE_CONFIG(ADDR_SURF_P2) |
3074                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3076                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3077                                 PIPE_CONFIG(ADDR_SURF_P2) |
3078                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3080                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081                                 PIPE_CONFIG(ADDR_SURF_P2) |
3082                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3084                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3085                                 PIPE_CONFIG(ADDR_SURF_P2) |
3086                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3088                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P2) |
3090                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3093                                 PIPE_CONFIG(ADDR_SURF_P2));
3094                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3095                                 PIPE_CONFIG(ADDR_SURF_P2) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3097                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3098                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3099                                  PIPE_CONFIG(ADDR_SURF_P2) |
3100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3102                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3103                                  PIPE_CONFIG(ADDR_SURF_P2) |
3104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3106                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3107                                  PIPE_CONFIG(ADDR_SURF_P2) |
3108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3110                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111                                  PIPE_CONFIG(ADDR_SURF_P2) |
3112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3115                                  PIPE_CONFIG(ADDR_SURF_P2) |
3116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3119                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3122                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3130                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3134                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3138                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3139                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3142                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3143                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3146                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3147                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3151                                  PIPE_CONFIG(ADDR_SURF_P2) |
3152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3155                                  PIPE_CONFIG(ADDR_SURF_P2) |
3156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3158                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3159                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3162                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3163                                  PIPE_CONFIG(ADDR_SURF_P2) |
3164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3166
3167                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170                                 NUM_BANKS(ADDR_SURF_8_BANK));
3171                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174                                 NUM_BANKS(ADDR_SURF_8_BANK));
3175                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3178                                 NUM_BANKS(ADDR_SURF_8_BANK));
3179                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182                                 NUM_BANKS(ADDR_SURF_8_BANK));
3183                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3186                                 NUM_BANKS(ADDR_SURF_8_BANK));
3187                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3190                                 NUM_BANKS(ADDR_SURF_8_BANK));
3191                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194                                 NUM_BANKS(ADDR_SURF_8_BANK));
3195                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3198                                 NUM_BANKS(ADDR_SURF_16_BANK));
3199                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3202                                 NUM_BANKS(ADDR_SURF_16_BANK));
3203                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3204                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3205                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206                                  NUM_BANKS(ADDR_SURF_16_BANK));
3207                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3208                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3209                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210                                  NUM_BANKS(ADDR_SURF_16_BANK));
3211                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3213                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3214                                  NUM_BANKS(ADDR_SURF_16_BANK));
3215                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3218                                  NUM_BANKS(ADDR_SURF_16_BANK));
3219                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3220                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3221                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3222                                  NUM_BANKS(ADDR_SURF_8_BANK));
3223
3224                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3225                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3226                             reg_offset != 23)
3227                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3228
3229                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3230                         if (reg_offset != 7)
3231                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3232
3233                 break;
3234         default:
3235                 dev_warn(adev->dev,
3236                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3237                          adev->asic_type);
3238                 /* fall through */
3239
3240         case CHIP_CARRIZO:
3241                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3242                                 PIPE_CONFIG(ADDR_SURF_P2) |
3243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3245                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246                                 PIPE_CONFIG(ADDR_SURF_P2) |
3247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3250                                 PIPE_CONFIG(ADDR_SURF_P2) |
3251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3254                                 PIPE_CONFIG(ADDR_SURF_P2) |
3255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3257                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258                                 PIPE_CONFIG(ADDR_SURF_P2) |
3259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3262                                 PIPE_CONFIG(ADDR_SURF_P2) |
3263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3265                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2));
3271                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3272                                 PIPE_CONFIG(ADDR_SURF_P2) |
3273                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3274                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3275                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3283                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3299                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343
3344                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347                                 NUM_BANKS(ADDR_SURF_8_BANK));
3348                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                 NUM_BANKS(ADDR_SURF_8_BANK));
3352                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355                                 NUM_BANKS(ADDR_SURF_8_BANK));
3356                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3359                                 NUM_BANKS(ADDR_SURF_8_BANK));
3360                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3363                                 NUM_BANKS(ADDR_SURF_8_BANK));
3364                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3367                                 NUM_BANKS(ADDR_SURF_8_BANK));
3368                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375                                 NUM_BANKS(ADDR_SURF_16_BANK));
3376                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                 NUM_BANKS(ADDR_SURF_16_BANK));
3380                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3381                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3382                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383                                  NUM_BANKS(ADDR_SURF_16_BANK));
3384                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3385                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3386                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387                                  NUM_BANKS(ADDR_SURF_16_BANK));
3388                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3390                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                  NUM_BANKS(ADDR_SURF_16_BANK));
3392                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395                                  NUM_BANKS(ADDR_SURF_16_BANK));
3396                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399                                  NUM_BANKS(ADDR_SURF_8_BANK));
3400
3401                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3402                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3403                             reg_offset != 23)
3404                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3405
3406                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3407                         if (reg_offset != 7)
3408                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3409
3410                 break;
3411         }
3412 }
3413
3414 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3415                                   u32 se_num, u32 sh_num, u32 instance)
3416 {
3417         u32 data;
3418
3419         if (instance == 0xffffffff)
3420                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3421         else
3422                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3423
3424         if (se_num == 0xffffffff)
3425                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3426         else
3427                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3428
3429         if (sh_num == 0xffffffff)
3430                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3431         else
3432                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3433
3434         WREG32(mmGRBM_GFX_INDEX, data);
3435 }
3436
3437 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3438                                   u32 me, u32 pipe, u32 q, u32 vm)
3439 {
3440         vi_srbm_select(adev, me, pipe, q, vm);
3441 }
3442
3443 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3444 {
3445         u32 data, mask;
3446
3447         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3448                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3449
3450         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3451
3452         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3453                                          adev->gfx.config.max_sh_per_se);
3454
3455         return (~data) & mask;
3456 }
3457
3458 static void
3459 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3460 {
3461         switch (adev->asic_type) {
3462         case CHIP_FIJI:
3463         case CHIP_VEGAM:
3464                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3465                           RB_XSEL2(1) | PKR_MAP(2) |
3466                           PKR_XSEL(1) | PKR_YSEL(1) |
3467                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3468                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3469                            SE_PAIR_YSEL(2);
3470                 break;
3471         case CHIP_TONGA:
3472         case CHIP_POLARIS10:
3473                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3474                           SE_XSEL(1) | SE_YSEL(1);
3475                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3476                            SE_PAIR_YSEL(2);
3477                 break;
3478         case CHIP_TOPAZ:
3479         case CHIP_CARRIZO:
3480                 *rconf |= RB_MAP_PKR0(2);
3481                 *rconf1 |= 0x0;
3482                 break;
3483         case CHIP_POLARIS11:
3484         case CHIP_POLARIS12:
3485                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3486                           SE_XSEL(1) | SE_YSEL(1);
3487                 *rconf1 |= 0x0;
3488                 break;
3489         case CHIP_STONEY:
3490                 *rconf |= 0x0;
3491                 *rconf1 |= 0x0;
3492                 break;
3493         default:
3494                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3495                 break;
3496         }
3497 }
3498
3499 static void
3500 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3501                                         u32 raster_config, u32 raster_config_1,
3502                                         unsigned rb_mask, unsigned num_rb)
3503 {
3504         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3505         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3506         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3507         unsigned rb_per_se = num_rb / num_se;
3508         unsigned se_mask[4];
3509         unsigned se;
3510
3511         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3512         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3513         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3514         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3515
3516         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3517         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3518         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3519
3520         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3521                              (!se_mask[2] && !se_mask[3]))) {
3522                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3523
3524                 if (!se_mask[0] && !se_mask[1]) {
3525                         raster_config_1 |=
3526                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3527                 } else {
3528                         raster_config_1 |=
3529                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3530                 }
3531         }
3532
3533         for (se = 0; se < num_se; se++) {
3534                 unsigned raster_config_se = raster_config;
3535                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3536                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3537                 int idx = (se / 2) * 2;
3538
3539                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3540                         raster_config_se &= ~SE_MAP_MASK;
3541
3542                         if (!se_mask[idx]) {
3543                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3544                         } else {
3545                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3546                         }
3547                 }
3548
3549                 pkr0_mask &= rb_mask;
3550                 pkr1_mask &= rb_mask;
3551                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3552                         raster_config_se &= ~PKR_MAP_MASK;
3553
3554                         if (!pkr0_mask) {
3555                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3556                         } else {
3557                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3558                         }
3559                 }
3560
3561                 if (rb_per_se >= 2) {
3562                         unsigned rb0_mask = 1 << (se * rb_per_se);
3563                         unsigned rb1_mask = rb0_mask << 1;
3564
3565                         rb0_mask &= rb_mask;
3566                         rb1_mask &= rb_mask;
3567                         if (!rb0_mask || !rb1_mask) {
3568                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3569
3570                                 if (!rb0_mask) {
3571                                         raster_config_se |=
3572                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3573                                 } else {
3574                                         raster_config_se |=
3575                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3576                                 }
3577                         }
3578
3579                         if (rb_per_se > 2) {
3580                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3581                                 rb1_mask = rb0_mask << 1;
3582                                 rb0_mask &= rb_mask;
3583                                 rb1_mask &= rb_mask;
3584                                 if (!rb0_mask || !rb1_mask) {
3585                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3586
3587                                         if (!rb0_mask) {
3588                                                 raster_config_se |=
3589                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3590                                         } else {
3591                                                 raster_config_se |=
3592                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3593                                         }
3594                                 }
3595                         }
3596                 }
3597
3598                 /* GRBM_GFX_INDEX has a different offset on VI */
3599                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3600                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3601                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3602         }
3603
3604         /* GRBM_GFX_INDEX has a different offset on VI */
3605         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3606 }
3607
3608 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3609 {
3610         int i, j;
3611         u32 data;
3612         u32 raster_config = 0, raster_config_1 = 0;
3613         u32 active_rbs = 0;
3614         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3615                                         adev->gfx.config.max_sh_per_se;
3616         unsigned num_rb_pipes;
3617
3618         mutex_lock(&adev->grbm_idx_mutex);
3619         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3620                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3621                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3622                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3623                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3624                                                rb_bitmap_width_per_sh);
3625                 }
3626         }
3627         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3628
3629         adev->gfx.config.backend_enable_mask = active_rbs;
3630         adev->gfx.config.num_rbs = hweight32(active_rbs);
3631
3632         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3633                              adev->gfx.config.max_shader_engines, 16);
3634
3635         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3636
3637         if (!adev->gfx.config.backend_enable_mask ||
3638                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3639                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3640                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3641         } else {
3642                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3643                                                         adev->gfx.config.backend_enable_mask,
3644                                                         num_rb_pipes);
3645         }
3646
3647         /* cache the values for userspace */
3648         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3649                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3650                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3651                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3652                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3653                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3654                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3655                         adev->gfx.config.rb_config[i][j].raster_config =
3656                                 RREG32(mmPA_SC_RASTER_CONFIG);
3657                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3658                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3659                 }
3660         }
3661         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662         mutex_unlock(&adev->grbm_idx_mutex);
3663 }
3664
3665 /**
3666  * gfx_v8_0_init_compute_vmid - gart enable
3667  *
3668  * @adev: amdgpu_device pointer
3669  *
3670  * Initialize compute vmid sh_mem registers
3671  *
3672  */
3673 #define DEFAULT_SH_MEM_BASES    (0x6000)
3674 #define FIRST_COMPUTE_VMID      (8)
3675 #define LAST_COMPUTE_VMID       (16)
3676 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3677 {
3678         int i;
3679         uint32_t sh_mem_config;
3680         uint32_t sh_mem_bases;
3681
3682         /*
3683          * Configure apertures:
3684          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3685          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3686          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3687          */
3688         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3689
3690         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3691                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3692                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3693                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3694                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3695                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3696
3697         mutex_lock(&adev->srbm_mutex);
3698         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3699                 vi_srbm_select(adev, 0, 0, 0, i);
3700                 /* CP and shaders */
3701                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3702                 WREG32(mmSH_MEM_APE1_BASE, 1);
3703                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3704                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3705         }
3706         vi_srbm_select(adev, 0, 0, 0, 0);
3707         mutex_unlock(&adev->srbm_mutex);
3708
3709         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3710            acccess. These should be enabled by FW for target VMIDs. */
3711         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3712                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3713                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3714                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3715                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3716         }
3717 }
3718
3719 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3720 {
3721         int vmid;
3722
3723         /*
3724          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3725          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3726          * the driver can enable them for graphics. VMID0 should maintain
3727          * access so that HWS firmware can save/restore entries.
3728          */
3729         for (vmid = 1; vmid < 16; vmid++) {
3730                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3731                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3732                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3733                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3734         }
3735 }
3736
3737 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3738 {
3739         switch (adev->asic_type) {
3740         default:
3741                 adev->gfx.config.double_offchip_lds_buf = 1;
3742                 break;
3743         case CHIP_CARRIZO:
3744         case CHIP_STONEY:
3745                 adev->gfx.config.double_offchip_lds_buf = 0;
3746                 break;
3747         }
3748 }
3749
3750 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3751 {
3752         u32 tmp, sh_static_mem_cfg;
3753         int i;
3754
3755         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3756         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3757         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3758         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3759
3760         gfx_v8_0_tiling_mode_table_init(adev);
3761         gfx_v8_0_setup_rb(adev);
3762         gfx_v8_0_get_cu_info(adev);
3763         gfx_v8_0_config_init(adev);
3764
3765         /* XXX SH_MEM regs */
3766         /* where to put LDS, scratch, GPUVM in FSA64 space */
3767         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3768                                    SWIZZLE_ENABLE, 1);
3769         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3770                                    ELEMENT_SIZE, 1);
3771         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3772                                    INDEX_STRIDE, 3);
3773         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3774
3775         mutex_lock(&adev->srbm_mutex);
3776         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3777                 vi_srbm_select(adev, 0, 0, 0, i);
3778                 /* CP and shaders */
3779                 if (i == 0) {
3780                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3781                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3782                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3783                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3784                         WREG32(mmSH_MEM_CONFIG, tmp);
3785                         WREG32(mmSH_MEM_BASES, 0);
3786                 } else {
3787                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3788                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3789                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3790                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3791                         WREG32(mmSH_MEM_CONFIG, tmp);
3792                         tmp = adev->gmc.shared_aperture_start >> 48;
3793                         WREG32(mmSH_MEM_BASES, tmp);
3794                 }
3795
3796                 WREG32(mmSH_MEM_APE1_BASE, 1);
3797                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3798         }
3799         vi_srbm_select(adev, 0, 0, 0, 0);
3800         mutex_unlock(&adev->srbm_mutex);
3801
3802         gfx_v8_0_init_compute_vmid(adev);
3803         gfx_v8_0_init_gds_vmid(adev);
3804
3805         mutex_lock(&adev->grbm_idx_mutex);
3806         /*
3807          * making sure that the following register writes will be broadcasted
3808          * to all the shaders
3809          */
3810         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3811
3812         WREG32(mmPA_SC_FIFO_SIZE,
3813                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3814                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3815                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3816                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3817                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3818                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3819                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3820                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3821
3822         tmp = RREG32(mmSPI_ARB_PRIORITY);
3823         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3824         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3825         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3826         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3827         WREG32(mmSPI_ARB_PRIORITY, tmp);
3828
3829         mutex_unlock(&adev->grbm_idx_mutex);
3830
3831 }
3832
3833 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3834 {
3835         u32 i, j, k;
3836         u32 mask;
3837
3838         mutex_lock(&adev->grbm_idx_mutex);
3839         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3840                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3841                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3842                         for (k = 0; k < adev->usec_timeout; k++) {
3843                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3844                                         break;
3845                                 udelay(1);
3846                         }
3847                         if (k == adev->usec_timeout) {
3848                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3849                                                       0xffffffff, 0xffffffff);
3850                                 mutex_unlock(&adev->grbm_idx_mutex);
3851                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3852                                          i, j);
3853                                 return;
3854                         }
3855                 }
3856         }
3857         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3858         mutex_unlock(&adev->grbm_idx_mutex);
3859
3860         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3861                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3862                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3863                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3864         for (k = 0; k < adev->usec_timeout; k++) {
3865                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3866                         break;
3867                 udelay(1);
3868         }
3869 }
3870
3871 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3872                                                bool enable)
3873 {
3874         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3875
3876         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3877         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3878         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3879         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3880
3881         WREG32(mmCP_INT_CNTL_RING0, tmp);
3882 }
3883
3884 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3885 {
3886         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3887         /* csib */
3888         WREG32(mmRLC_CSIB_ADDR_HI,
3889                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3890         WREG32(mmRLC_CSIB_ADDR_LO,
3891                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3892         WREG32(mmRLC_CSIB_LENGTH,
3893                         adev->gfx.rlc.clear_state_size);
3894 }
3895
3896 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3897                                 int ind_offset,
3898                                 int list_size,
3899                                 int *unique_indices,
3900                                 int *indices_count,
3901                                 int max_indices,
3902                                 int *ind_start_offsets,
3903                                 int *offset_count,
3904                                 int max_offset)
3905 {
3906         int indices;
3907         bool new_entry = true;
3908
3909         for (; ind_offset < list_size; ind_offset++) {
3910
3911                 if (new_entry) {
3912                         new_entry = false;
3913                         ind_start_offsets[*offset_count] = ind_offset;
3914                         *offset_count = *offset_count + 1;
3915                         BUG_ON(*offset_count >= max_offset);
3916                 }
3917
3918                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3919                         new_entry = true;
3920                         continue;
3921                 }
3922
3923                 ind_offset += 2;
3924
3925                 /* look for the matching indice */
3926                 for (indices = 0;
3927                         indices < *indices_count;
3928                         indices++) {
3929                         if (unique_indices[indices] ==
3930                                 register_list_format[ind_offset])
3931                                 break;
3932                 }
3933
3934                 if (indices >= *indices_count) {
3935                         unique_indices[*indices_count] =
3936                                 register_list_format[ind_offset];
3937                         indices = *indices_count;
3938                         *indices_count = *indices_count + 1;
3939                         BUG_ON(*indices_count >= max_indices);
3940                 }
3941
3942                 register_list_format[ind_offset] = indices;
3943         }
3944 }
3945
3946 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3947 {
3948         int i, temp, data;
3949         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3950         int indices_count = 0;
3951         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3952         int offset_count = 0;
3953
3954         int list_size;
3955         unsigned int *register_list_format =
3956                 kmemdup(adev->gfx.rlc.register_list_format,
3957                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3958         if (!register_list_format)
3959                 return -ENOMEM;
3960
3961         gfx_v8_0_parse_ind_reg_list(register_list_format,
3962                                 RLC_FormatDirectRegListLength,
3963                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3964                                 unique_indices,
3965                                 &indices_count,
3966                                 ARRAY_SIZE(unique_indices),
3967                                 indirect_start_offsets,
3968                                 &offset_count,
3969                                 ARRAY_SIZE(indirect_start_offsets));
3970
3971         /* save and restore list */
3972         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3973
3974         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3975         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3976                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3977
3978         /* indirect list */
3979         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3980         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3981                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3982
3983         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3984         list_size = list_size >> 1;
3985         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3986         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3987
3988         /* starting offsets starts */
3989         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3990                 adev->gfx.rlc.starting_offsets_start);
3991         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3992                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3993                                 indirect_start_offsets[i]);
3994
3995         /* unique indices */
3996         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3997         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3998         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3999                 if (unique_indices[i] != 0) {
4000                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4001                         WREG32(data + i, unique_indices[i] >> 20);
4002                 }
4003         }
4004         kfree(register_list_format);
4005
4006         return 0;
4007 }
4008
4009 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4010 {
4011         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4012 }
4013
4014 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4015 {
4016         uint32_t data;
4017
4018         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4019
4020         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4021         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4022         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4023         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4024         WREG32(mmRLC_PG_DELAY, data);
4025
4026         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4027         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4028
4029 }
4030
4031 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4032                                                 bool enable)
4033 {
4034         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4035 }
4036
4037 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4038                                                   bool enable)
4039 {
4040         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4041 }
4042
4043 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4044 {
4045         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4046 }
4047
4048 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4049 {
4050         if ((adev->asic_type == CHIP_CARRIZO) ||
4051             (adev->asic_type == CHIP_STONEY)) {
4052                 gfx_v8_0_init_csb(adev);
4053                 gfx_v8_0_init_save_restore_list(adev);
4054                 gfx_v8_0_enable_save_restore_machine(adev);
4055                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4056                 gfx_v8_0_init_power_gating(adev);
4057                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4058         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4059                    (adev->asic_type == CHIP_POLARIS12) ||
4060                    (adev->asic_type == CHIP_VEGAM)) {
4061                 gfx_v8_0_init_csb(adev);
4062                 gfx_v8_0_init_save_restore_list(adev);
4063                 gfx_v8_0_enable_save_restore_machine(adev);
4064                 gfx_v8_0_init_power_gating(adev);
4065         }
4066
4067 }
4068
4069 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4070 {
4071         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4072
4073         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4074         gfx_v8_0_wait_for_rlc_serdes(adev);
4075 }
4076
4077 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4078 {
4079         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4080         udelay(50);
4081
4082         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4083         udelay(50);
4084 }
4085
4086 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4087 {
4088         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4089
4090         /* carrizo do enable cp interrupt after cp inited */
4091         if (!(adev->flags & AMD_IS_APU))
4092                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4093
4094         udelay(50);
4095 }
4096
4097 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4098 {
4099         if (amdgpu_sriov_vf(adev)) {
4100                 gfx_v8_0_init_csb(adev);
4101                 return 0;
4102         }
4103
4104         adev->gfx.rlc.funcs->stop(adev);
4105         adev->gfx.rlc.funcs->reset(adev);
4106         gfx_v8_0_init_pg(adev);
4107         adev->gfx.rlc.funcs->start(adev);
4108
4109         return 0;
4110 }
4111
4112 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4113 {
4114         int i;
4115         u32 tmp = RREG32(mmCP_ME_CNTL);
4116
4117         if (enable) {
4118                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4119                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4120                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4121         } else {
4122                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4123                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4124                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4125                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4126                         adev->gfx.gfx_ring[i].sched.ready = false;
4127         }
4128         WREG32(mmCP_ME_CNTL, tmp);
4129         udelay(50);
4130 }
4131
4132 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4133 {
4134         u32 count = 0;
4135         const struct cs_section_def *sect = NULL;
4136         const struct cs_extent_def *ext = NULL;
4137
4138         /* begin clear state */
4139         count += 2;
4140         /* context control state */
4141         count += 3;
4142
4143         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4144                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4145                         if (sect->id == SECT_CONTEXT)
4146                                 count += 2 + ext->reg_count;
4147                         else
4148                                 return 0;
4149                 }
4150         }
4151         /* pa_sc_raster_config/pa_sc_raster_config1 */
4152         count += 4;
4153         /* end clear state */
4154         count += 2;
4155         /* clear state */
4156         count += 2;
4157
4158         return count;
4159 }
4160
4161 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4162 {
4163         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4164         const struct cs_section_def *sect = NULL;
4165         const struct cs_extent_def *ext = NULL;
4166         int r, i;
4167
4168         /* init the CP */
4169         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4170         WREG32(mmCP_ENDIAN_SWAP, 0);
4171         WREG32(mmCP_DEVICE_ID, 1);
4172
4173         gfx_v8_0_cp_gfx_enable(adev, true);
4174
4175         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4176         if (r) {
4177                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4178                 return r;
4179         }
4180
4181         /* clear state buffer */
4182         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4183         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4184
4185         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4186         amdgpu_ring_write(ring, 0x80000000);
4187         amdgpu_ring_write(ring, 0x80000000);
4188
4189         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4190                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4191                         if (sect->id == SECT_CONTEXT) {
4192                                 amdgpu_ring_write(ring,
4193                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4194                                                ext->reg_count));
4195                                 amdgpu_ring_write(ring,
4196                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4197                                 for (i = 0; i < ext->reg_count; i++)
4198                                         amdgpu_ring_write(ring, ext->extent[i]);
4199                         }
4200                 }
4201         }
4202
4203         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4204         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4205         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4206         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4207
4208         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4209         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4210
4211         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4212         amdgpu_ring_write(ring, 0);
4213
4214         /* init the CE partitions */
4215         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4216         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4217         amdgpu_ring_write(ring, 0x8000);
4218         amdgpu_ring_write(ring, 0x8000);
4219
4220         amdgpu_ring_commit(ring);
4221
4222         return 0;
4223 }
4224 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4225 {
4226         u32 tmp;
4227         /* no gfx doorbells on iceland */
4228         if (adev->asic_type == CHIP_TOPAZ)
4229                 return;
4230
4231         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4232
4233         if (ring->use_doorbell) {
4234                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4235                                 DOORBELL_OFFSET, ring->doorbell_index);
4236                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4237                                                 DOORBELL_HIT, 0);
4238                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4239                                             DOORBELL_EN, 1);
4240         } else {
4241                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4242         }
4243
4244         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4245
4246         if (adev->flags & AMD_IS_APU)
4247                 return;
4248
4249         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4250                                         DOORBELL_RANGE_LOWER,
4251                                         adev->doorbell_index.gfx_ring0);
4252         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4253
4254         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4255                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4256 }
4257
4258 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4259 {
4260         struct amdgpu_ring *ring;
4261         u32 tmp;
4262         u32 rb_bufsz;
4263         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4264
4265         /* Set the write pointer delay */
4266         WREG32(mmCP_RB_WPTR_DELAY, 0);
4267
4268         /* set the RB to use vmid 0 */
4269         WREG32(mmCP_RB_VMID, 0);
4270
4271         /* Set ring buffer size */
4272         ring = &adev->gfx.gfx_ring[0];
4273         rb_bufsz = order_base_2(ring->ring_size / 8);
4274         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4275         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4276         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4277         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4278 #ifdef __BIG_ENDIAN
4279         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4280 #endif
4281         WREG32(mmCP_RB0_CNTL, tmp);
4282
4283         /* Initialize the ring buffer's read and write pointers */
4284         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4285         ring->wptr = 0;
4286         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4287
4288         /* set the wb address wether it's enabled or not */
4289         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4290         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4291         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4292
4293         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4294         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4295         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4296         mdelay(1);
4297         WREG32(mmCP_RB0_CNTL, tmp);
4298
4299         rb_addr = ring->gpu_addr >> 8;
4300         WREG32(mmCP_RB0_BASE, rb_addr);
4301         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4302
4303         gfx_v8_0_set_cpg_door_bell(adev, ring);
4304         /* start the ring */
4305         amdgpu_ring_clear_ring(ring);
4306         gfx_v8_0_cp_gfx_start(adev);
4307         ring->sched.ready = true;
4308
4309         return 0;
4310 }
4311
4312 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4313 {
4314         int i;
4315
4316         if (enable) {
4317                 WREG32(mmCP_MEC_CNTL, 0);
4318         } else {
4319                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4320                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4321                         adev->gfx.compute_ring[i].sched.ready = false;
4322                 adev->gfx.kiq.ring.sched.ready = false;
4323         }
4324         udelay(50);
4325 }
4326
4327 /* KIQ functions */
4328 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4329 {
4330         uint32_t tmp;
4331         struct amdgpu_device *adev = ring->adev;
4332
4333         /* tell RLC which is KIQ queue */
4334         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4335         tmp &= 0xffffff00;
4336         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4337         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4338         tmp |= 0x80;
4339         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4340 }
4341
4342 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4343 {
4344         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4345         uint64_t queue_mask = 0;
4346         int r, i;
4347
4348         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4349                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4350                         continue;
4351
4352                 /* This situation may be hit in the future if a new HW
4353                  * generation exposes more than 64 queues. If so, the
4354                  * definition of queue_mask needs updating */
4355                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4356                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4357                         break;
4358                 }
4359
4360                 queue_mask |= (1ull << i);
4361         }
4362
4363         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4364         if (r) {
4365                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4366                 return r;
4367         }
4368         /* set resources */
4369         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4370         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4371         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4372         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4373         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4374         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4375         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4376         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4377         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4378                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4379                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4380                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4381
4382                 /* map queues */
4383                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4384                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4385                 amdgpu_ring_write(kiq_ring,
4386                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4387                 amdgpu_ring_write(kiq_ring,
4388                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4389                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4390                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4391                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4392                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4393                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4394                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4395                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4396         }
4397
4398         amdgpu_ring_commit(kiq_ring);
4399
4400         return 0;
4401 }
4402
4403 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4404 {
4405         int i, r = 0;
4406
4407         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4408                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4409                 for (i = 0; i < adev->usec_timeout; i++) {
4410                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4411                                 break;
4412                         udelay(1);
4413                 }
4414                 if (i == adev->usec_timeout)
4415                         r = -ETIMEDOUT;
4416         }
4417         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4418         WREG32(mmCP_HQD_PQ_RPTR, 0);
4419         WREG32(mmCP_HQD_PQ_WPTR, 0);
4420
4421         return r;
4422 }
4423
4424 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4425 {
4426         struct amdgpu_device *adev = ring->adev;
4427         struct vi_mqd *mqd = ring->mqd_ptr;
4428         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4429         uint32_t tmp;
4430
4431         mqd->header = 0xC0310800;
4432         mqd->compute_pipelinestat_enable = 0x00000001;
4433         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4434         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4435         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4436         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4437         mqd->compute_misc_reserved = 0x00000003;
4438         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4439                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4440         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4441                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4442         eop_base_addr = ring->eop_gpu_addr >> 8;
4443         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4444         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4445
4446         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4447         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4448         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4449                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4450
4451         mqd->cp_hqd_eop_control = tmp;
4452
4453         /* enable doorbell? */
4454         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4455                             CP_HQD_PQ_DOORBELL_CONTROL,
4456                             DOORBELL_EN,
4457                             ring->use_doorbell ? 1 : 0);
4458
4459         mqd->cp_hqd_pq_doorbell_control = tmp;
4460
4461         /* set the pointer to the MQD */
4462         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4463         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4464
4465         /* set MQD vmid to 0 */
4466         tmp = RREG32(mmCP_MQD_CONTROL);
4467         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4468         mqd->cp_mqd_control = tmp;
4469
4470         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4471         hqd_gpu_addr = ring->gpu_addr >> 8;
4472         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4473         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4474
4475         /* set up the HQD, this is similar to CP_RB0_CNTL */
4476         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4477         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4478                             (order_base_2(ring->ring_size / 4) - 1));
4479         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4480                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4481 #ifdef __BIG_ENDIAN
4482         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4483 #endif
4484         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4485         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4486         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4487         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4488         mqd->cp_hqd_pq_control = tmp;
4489
4490         /* set the wb address whether it's enabled or not */
4491         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4492         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4493         mqd->cp_hqd_pq_rptr_report_addr_hi =
4494                 upper_32_bits(wb_gpu_addr) & 0xffff;
4495
4496         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4497         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4498         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4499         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4500
4501         tmp = 0;
4502         /* enable the doorbell if requested */
4503         if (ring->use_doorbell) {
4504                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4505                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4506                                 DOORBELL_OFFSET, ring->doorbell_index);
4507
4508                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4509                                          DOORBELL_EN, 1);
4510                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4511                                          DOORBELL_SOURCE, 0);
4512                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4513                                          DOORBELL_HIT, 0);
4514         }
4515
4516         mqd->cp_hqd_pq_doorbell_control = tmp;
4517
4518         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4519         ring->wptr = 0;
4520         mqd->cp_hqd_pq_wptr = ring->wptr;
4521         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4522
4523         /* set the vmid for the queue */
4524         mqd->cp_hqd_vmid = 0;
4525
4526         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4527         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4528         mqd->cp_hqd_persistent_state = tmp;
4529
4530         /* set MTYPE */
4531         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4532         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4533         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4534         mqd->cp_hqd_ib_control = tmp;
4535
4536         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4537         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4538         mqd->cp_hqd_iq_timer = tmp;
4539
4540         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4541         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4542         mqd->cp_hqd_ctx_save_control = tmp;
4543
4544         /* defaults */
4545         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4546         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4547         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4548         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4549         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4550         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4551         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4552         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4553         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4554         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4555         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4556         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4557         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4558         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4559         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4560
4561         /* map_queues packet doesn't need activate the queue,
4562          * so only kiq need set this field.
4563          */
4564         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4565                 mqd->cp_hqd_active = 1;
4566
4567         return 0;
4568 }
4569
4570 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4571                         struct vi_mqd *mqd)
4572 {
4573         uint32_t mqd_reg;
4574         uint32_t *mqd_data;
4575
4576         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4577         mqd_data = &mqd->cp_mqd_base_addr_lo;
4578
4579         /* disable wptr polling */
4580         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4581
4582         /* program all HQD registers */
4583         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4584                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4585
4586         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4587          * This is safe since EOP RPTR==WPTR for any inactive HQD
4588          * on ASICs that do not support context-save.
4589          * EOP writes/reads can start anywhere in the ring.
4590          */
4591         if (adev->asic_type != CHIP_TONGA) {
4592                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4593                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4594                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4595         }
4596
4597         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4598                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4599
4600         /* activate the HQD */
4601         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4602                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4603
4604         return 0;
4605 }
4606
4607 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4608 {
4609         struct amdgpu_device *adev = ring->adev;
4610         struct vi_mqd *mqd = ring->mqd_ptr;
4611         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4612
4613         gfx_v8_0_kiq_setting(ring);
4614
4615         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4616                 /* reset MQD to a clean status */
4617                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4618                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4619
4620                 /* reset ring buffer */
4621                 ring->wptr = 0;
4622                 amdgpu_ring_clear_ring(ring);
4623                 mutex_lock(&adev->srbm_mutex);
4624                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4625                 gfx_v8_0_mqd_commit(adev, mqd);
4626                 vi_srbm_select(adev, 0, 0, 0, 0);
4627                 mutex_unlock(&adev->srbm_mutex);
4628         } else {
4629                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4630                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4631                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4632                 mutex_lock(&adev->srbm_mutex);
4633                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4634                 gfx_v8_0_mqd_init(ring);
4635                 gfx_v8_0_mqd_commit(adev, mqd);
4636                 vi_srbm_select(adev, 0, 0, 0, 0);
4637                 mutex_unlock(&adev->srbm_mutex);
4638
4639                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4640                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4641         }
4642
4643         return 0;
4644 }
4645
4646 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4647 {
4648         struct amdgpu_device *adev = ring->adev;
4649         struct vi_mqd *mqd = ring->mqd_ptr;
4650         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4651
4652         if (!adev->in_gpu_reset && !adev->in_suspend) {
4653                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4654                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4655                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4656                 mutex_lock(&adev->srbm_mutex);
4657                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4658                 gfx_v8_0_mqd_init(ring);
4659                 vi_srbm_select(adev, 0, 0, 0, 0);
4660                 mutex_unlock(&adev->srbm_mutex);
4661
4662                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4663                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4664         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4665                 /* reset MQD to a clean status */
4666                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4667                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4668                 /* reset ring buffer */
4669                 ring->wptr = 0;
4670                 amdgpu_ring_clear_ring(ring);
4671         } else {
4672                 amdgpu_ring_clear_ring(ring);
4673         }
4674         return 0;
4675 }
4676
4677 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4678 {
4679         if (adev->asic_type > CHIP_TONGA) {
4680                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4681                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4682         }
4683         /* enable doorbells */
4684         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4685 }
4686
4687 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4688 {
4689         struct amdgpu_ring *ring;
4690         int r;
4691
4692         ring = &adev->gfx.kiq.ring;
4693
4694         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4695         if (unlikely(r != 0))
4696                 return r;
4697
4698         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4699         if (unlikely(r != 0))
4700                 return r;
4701
4702         gfx_v8_0_kiq_init_queue(ring);
4703         amdgpu_bo_kunmap(ring->mqd_obj);
4704         ring->mqd_ptr = NULL;
4705         amdgpu_bo_unreserve(ring->mqd_obj);
4706         ring->sched.ready = true;
4707         return 0;
4708 }
4709
4710 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4711 {
4712         struct amdgpu_ring *ring = NULL;
4713         int r = 0, i;
4714
4715         gfx_v8_0_cp_compute_enable(adev, true);
4716
4717         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4718                 ring = &adev->gfx.compute_ring[i];
4719
4720                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4721                 if (unlikely(r != 0))
4722                         goto done;
4723                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4724                 if (!r) {
4725                         r = gfx_v8_0_kcq_init_queue(ring);
4726                         amdgpu_bo_kunmap(ring->mqd_obj);
4727                         ring->mqd_ptr = NULL;
4728                 }
4729                 amdgpu_bo_unreserve(ring->mqd_obj);
4730                 if (r)
4731                         goto done;
4732         }
4733
4734         gfx_v8_0_set_mec_doorbell_range(adev);
4735
4736         r = gfx_v8_0_kiq_kcq_enable(adev);
4737         if (r)
4738                 goto done;
4739
4740 done:
4741         return r;
4742 }
4743
4744 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4745 {
4746         int r, i;
4747         struct amdgpu_ring *ring;
4748
4749         /* collect all the ring_tests here, gfx, kiq, compute */
4750         ring = &adev->gfx.gfx_ring[0];
4751         r = amdgpu_ring_test_helper(ring);
4752         if (r)
4753                 return r;
4754
4755         ring = &adev->gfx.kiq.ring;
4756         r = amdgpu_ring_test_helper(ring);
4757         if (r)
4758                 return r;
4759
4760         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4761                 ring = &adev->gfx.compute_ring[i];
4762                 amdgpu_ring_test_helper(ring);
4763         }
4764
4765         return 0;
4766 }
4767
4768 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4769 {
4770         int r;
4771
4772         if (!(adev->flags & AMD_IS_APU))
4773                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4774
4775         r = gfx_v8_0_kiq_resume(adev);
4776         if (r)
4777                 return r;
4778
4779         r = gfx_v8_0_cp_gfx_resume(adev);
4780         if (r)
4781                 return r;
4782
4783         r = gfx_v8_0_kcq_resume(adev);
4784         if (r)
4785                 return r;
4786
4787         r = gfx_v8_0_cp_test_all_rings(adev);
4788         if (r)
4789                 return r;
4790
4791         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4792
4793         return 0;
4794 }
4795
4796 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4797 {
4798         gfx_v8_0_cp_gfx_enable(adev, enable);
4799         gfx_v8_0_cp_compute_enable(adev, enable);
4800 }
4801
4802 static int gfx_v8_0_hw_init(void *handle)
4803 {
4804         int r;
4805         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4806
4807         gfx_v8_0_init_golden_registers(adev);
4808         gfx_v8_0_constants_init(adev);
4809
4810         r = adev->gfx.rlc.funcs->resume(adev);
4811         if (r)
4812                 return r;
4813
4814         r = gfx_v8_0_cp_resume(adev);
4815
4816         return r;
4817 }
4818
4819 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4820 {
4821         int r, i;
4822         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4823
4824         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4825         if (r)
4826                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4827
4828         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4829                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4830
4831                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4832                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4833                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4834                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4835                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4836                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4837                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4838                 amdgpu_ring_write(kiq_ring, 0);
4839                 amdgpu_ring_write(kiq_ring, 0);
4840                 amdgpu_ring_write(kiq_ring, 0);
4841         }
4842         r = amdgpu_ring_test_helper(kiq_ring);
4843         if (r)
4844                 DRM_ERROR("KCQ disable failed\n");
4845
4846         return r;
4847 }
4848
4849 static bool gfx_v8_0_is_idle(void *handle)
4850 {
4851         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4852
4853         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4854                 || RREG32(mmGRBM_STATUS2) != 0x8)
4855                 return false;
4856         else
4857                 return true;
4858 }
4859
4860 static bool gfx_v8_0_rlc_is_idle(void *handle)
4861 {
4862         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4863
4864         if (RREG32(mmGRBM_STATUS2) != 0x8)
4865                 return false;
4866         else
4867                 return true;
4868 }
4869
4870 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4871 {
4872         unsigned int i;
4873         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4874
4875         for (i = 0; i < adev->usec_timeout; i++) {
4876                 if (gfx_v8_0_rlc_is_idle(handle))
4877                         return 0;
4878
4879                 udelay(1);
4880         }
4881         return -ETIMEDOUT;
4882 }
4883
4884 static int gfx_v8_0_wait_for_idle(void *handle)
4885 {
4886         unsigned int i;
4887         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888
4889         for (i = 0; i < adev->usec_timeout; i++) {
4890                 if (gfx_v8_0_is_idle(handle))
4891                         return 0;
4892
4893                 udelay(1);
4894         }
4895         return -ETIMEDOUT;
4896 }
4897
4898 static int gfx_v8_0_hw_fini(void *handle)
4899 {
4900         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4901
4902         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4903         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4904
4905         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4906
4907         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4908
4909         /* disable KCQ to avoid CPC touch memory not valid anymore */
4910         gfx_v8_0_kcq_disable(adev);
4911
4912         if (amdgpu_sriov_vf(adev)) {
4913                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4914                 return 0;
4915         }
4916         amdgpu_gfx_rlc_enter_safe_mode(adev);
4917         if (!gfx_v8_0_wait_for_idle(adev))
4918                 gfx_v8_0_cp_enable(adev, false);
4919         else
4920                 pr_err("cp is busy, skip halt cp\n");
4921         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4922                 adev->gfx.rlc.funcs->stop(adev);
4923         else
4924                 pr_err("rlc is busy, skip halt rlc\n");
4925         amdgpu_gfx_rlc_exit_safe_mode(adev);
4926
4927         return 0;
4928 }
4929
4930 static int gfx_v8_0_suspend(void *handle)
4931 {
4932         return gfx_v8_0_hw_fini(handle);
4933 }
4934
4935 static int gfx_v8_0_resume(void *handle)
4936 {
4937         return gfx_v8_0_hw_init(handle);
4938 }
4939
4940 static bool gfx_v8_0_check_soft_reset(void *handle)
4941 {
4942         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4943         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4944         u32 tmp;
4945
4946         /* GRBM_STATUS */
4947         tmp = RREG32(mmGRBM_STATUS);
4948         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4949                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4950                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4951                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4952                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4953                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4954                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4955                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4956                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4957                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4958                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4959                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4960                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4961         }
4962
4963         /* GRBM_STATUS2 */
4964         tmp = RREG32(mmGRBM_STATUS2);
4965         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4966                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4967                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4968
4969         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4970             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4971             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4972                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4973                                                 SOFT_RESET_CPF, 1);
4974                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4975                                                 SOFT_RESET_CPC, 1);
4976                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4977                                                 SOFT_RESET_CPG, 1);
4978                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4979                                                 SOFT_RESET_GRBM, 1);
4980         }
4981
4982         /* SRBM_STATUS */
4983         tmp = RREG32(mmSRBM_STATUS);
4984         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4985                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4986                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4987         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4988                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4989                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4990
4991         if (grbm_soft_reset || srbm_soft_reset) {
4992                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4993                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4994                 return true;
4995         } else {
4996                 adev->gfx.grbm_soft_reset = 0;
4997                 adev->gfx.srbm_soft_reset = 0;
4998                 return false;
4999         }
5000 }
5001
5002 static int gfx_v8_0_pre_soft_reset(void *handle)
5003 {
5004         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5005         u32 grbm_soft_reset = 0;
5006
5007         if ((!adev->gfx.grbm_soft_reset) &&
5008             (!adev->gfx.srbm_soft_reset))
5009                 return 0;
5010
5011         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5012
5013         /* stop the rlc */
5014         adev->gfx.rlc.funcs->stop(adev);
5015
5016         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5017             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5018                 /* Disable GFX parsing/prefetching */
5019                 gfx_v8_0_cp_gfx_enable(adev, false);
5020
5021         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5022             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5023             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5024             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5025                 int i;
5026
5027                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5028                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5029
5030                         mutex_lock(&adev->srbm_mutex);
5031                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5032                         gfx_v8_0_deactivate_hqd(adev, 2);
5033                         vi_srbm_select(adev, 0, 0, 0, 0);
5034                         mutex_unlock(&adev->srbm_mutex);
5035                 }
5036                 /* Disable MEC parsing/prefetching */
5037                 gfx_v8_0_cp_compute_enable(adev, false);
5038         }
5039
5040        return 0;
5041 }
5042
5043 static int gfx_v8_0_soft_reset(void *handle)
5044 {
5045         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5046         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5047         u32 tmp;
5048
5049         if ((!adev->gfx.grbm_soft_reset) &&
5050             (!adev->gfx.srbm_soft_reset))
5051                 return 0;
5052
5053         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5054         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5055
5056         if (grbm_soft_reset || srbm_soft_reset) {
5057                 tmp = RREG32(mmGMCON_DEBUG);
5058                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5059                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5060                 WREG32(mmGMCON_DEBUG, tmp);
5061                 udelay(50);
5062         }
5063
5064         if (grbm_soft_reset) {
5065                 tmp = RREG32(mmGRBM_SOFT_RESET);
5066                 tmp |= grbm_soft_reset;
5067                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5068                 WREG32(mmGRBM_SOFT_RESET, tmp);
5069                 tmp = RREG32(mmGRBM_SOFT_RESET);
5070
5071                 udelay(50);
5072
5073                 tmp &= ~grbm_soft_reset;
5074                 WREG32(mmGRBM_SOFT_RESET, tmp);
5075                 tmp = RREG32(mmGRBM_SOFT_RESET);
5076         }
5077
5078         if (srbm_soft_reset) {
5079                 tmp = RREG32(mmSRBM_SOFT_RESET);
5080                 tmp |= srbm_soft_reset;
5081                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5082                 WREG32(mmSRBM_SOFT_RESET, tmp);
5083                 tmp = RREG32(mmSRBM_SOFT_RESET);
5084
5085                 udelay(50);
5086
5087                 tmp &= ~srbm_soft_reset;
5088                 WREG32(mmSRBM_SOFT_RESET, tmp);
5089                 tmp = RREG32(mmSRBM_SOFT_RESET);
5090         }
5091
5092         if (grbm_soft_reset || srbm_soft_reset) {
5093                 tmp = RREG32(mmGMCON_DEBUG);
5094                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5095                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5096                 WREG32(mmGMCON_DEBUG, tmp);
5097         }
5098
5099         /* Wait a little for things to settle down */
5100         udelay(50);
5101
5102         return 0;
5103 }
5104
5105 static int gfx_v8_0_post_soft_reset(void *handle)
5106 {
5107         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5108         u32 grbm_soft_reset = 0;
5109
5110         if ((!adev->gfx.grbm_soft_reset) &&
5111             (!adev->gfx.srbm_soft_reset))
5112                 return 0;
5113
5114         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5115
5116         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5117             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5118             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5119             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5120                 int i;
5121
5122                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5123                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5124
5125                         mutex_lock(&adev->srbm_mutex);
5126                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5127                         gfx_v8_0_deactivate_hqd(adev, 2);
5128                         vi_srbm_select(adev, 0, 0, 0, 0);
5129                         mutex_unlock(&adev->srbm_mutex);
5130                 }
5131                 gfx_v8_0_kiq_resume(adev);
5132                 gfx_v8_0_kcq_resume(adev);
5133         }
5134
5135         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5136             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5137                 gfx_v8_0_cp_gfx_resume(adev);
5138
5139         gfx_v8_0_cp_test_all_rings(adev);
5140
5141         adev->gfx.rlc.funcs->start(adev);
5142
5143         return 0;
5144 }
5145
5146 /**
5147  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5148  *
5149  * @adev: amdgpu_device pointer
5150  *
5151  * Fetches a GPU clock counter snapshot.
5152  * Returns the 64 bit clock counter snapshot.
5153  */
5154 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5155 {
5156         uint64_t clock;
5157
5158         mutex_lock(&adev->gfx.gpu_clock_mutex);
5159         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5160         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5161                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5162         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5163         return clock;
5164 }
5165
5166 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5167                                           uint32_t vmid,
5168                                           uint32_t gds_base, uint32_t gds_size,
5169                                           uint32_t gws_base, uint32_t gws_size,
5170                                           uint32_t oa_base, uint32_t oa_size)
5171 {
5172         /* GDS Base */
5173         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5174         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5175                                 WRITE_DATA_DST_SEL(0)));
5176         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5177         amdgpu_ring_write(ring, 0);
5178         amdgpu_ring_write(ring, gds_base);
5179
5180         /* GDS Size */
5181         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5182         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5183                                 WRITE_DATA_DST_SEL(0)));
5184         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5185         amdgpu_ring_write(ring, 0);
5186         amdgpu_ring_write(ring, gds_size);
5187
5188         /* GWS */
5189         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191                                 WRITE_DATA_DST_SEL(0)));
5192         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5193         amdgpu_ring_write(ring, 0);
5194         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5195
5196         /* OA */
5197         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5198         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5199                                 WRITE_DATA_DST_SEL(0)));
5200         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5201         amdgpu_ring_write(ring, 0);
5202         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5203 }
5204
5205 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5206 {
5207         WREG32(mmSQ_IND_INDEX,
5208                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5209                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5210                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5211                 (SQ_IND_INDEX__FORCE_READ_MASK));
5212         return RREG32(mmSQ_IND_DATA);
5213 }
5214
5215 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5216                            uint32_t wave, uint32_t thread,
5217                            uint32_t regno, uint32_t num, uint32_t *out)
5218 {
5219         WREG32(mmSQ_IND_INDEX,
5220                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5221                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5222                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5223                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5224                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5225                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5226         while (num--)
5227                 *(out++) = RREG32(mmSQ_IND_DATA);
5228 }
5229
5230 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5231 {
5232         /* type 0 wave data */
5233         dst[(*no_fields)++] = 0;
5234         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5235         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5236         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5237         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5238         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5239         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5240         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5241         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5242         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5243         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5244         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5245         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5246         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5247         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5248         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5249         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5250         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5251         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5252 }
5253
5254 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5255                                      uint32_t wave, uint32_t start,
5256                                      uint32_t size, uint32_t *dst)
5257 {
5258         wave_read_regs(
5259                 adev, simd, wave, 0,
5260                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5261 }
5262
5263
5264 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5265         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5266         .select_se_sh = &gfx_v8_0_select_se_sh,
5267         .read_wave_data = &gfx_v8_0_read_wave_data,
5268         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5269         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5270 };
5271
5272 static int gfx_v8_0_early_init(void *handle)
5273 {
5274         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5275
5276         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5277         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5278         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5279         gfx_v8_0_set_ring_funcs(adev);
5280         gfx_v8_0_set_irq_funcs(adev);
5281         gfx_v8_0_set_gds_init(adev);
5282         gfx_v8_0_set_rlc_funcs(adev);
5283
5284         return 0;
5285 }
5286
5287 static int gfx_v8_0_late_init(void *handle)
5288 {
5289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5290         int r;
5291
5292         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5293         if (r)
5294                 return r;
5295
5296         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5297         if (r)
5298                 return r;
5299
5300         /* requires IBs so do in late init after IB pool is initialized */
5301         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5302         if (r)
5303                 return r;
5304
5305         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5306         if (r) {
5307                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5308                 return r;
5309         }
5310
5311         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5312         if (r) {
5313                 DRM_ERROR(
5314                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5315                         r);
5316                 return r;
5317         }
5318
5319         return 0;
5320 }
5321
5322 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5323                                                        bool enable)
5324 {
5325         if (((adev->asic_type == CHIP_POLARIS11) ||
5326             (adev->asic_type == CHIP_POLARIS12) ||
5327             (adev->asic_type == CHIP_VEGAM)) &&
5328             adev->powerplay.pp_funcs->set_powergating_by_smu)
5329                 /* Send msg to SMU via Powerplay */
5330                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5331
5332         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5333 }
5334
5335 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5336                                                         bool enable)
5337 {
5338         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5339 }
5340
5341 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5342                 bool enable)
5343 {
5344         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5345 }
5346
5347 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5348                                           bool enable)
5349 {
5350         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5351 }
5352
5353 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5354                                                 bool enable)
5355 {
5356         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5357
5358         /* Read any GFX register to wake up GFX. */
5359         if (!enable)
5360                 RREG32(mmDB_RENDER_CONTROL);
5361 }
5362
5363 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5364                                           bool enable)
5365 {
5366         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5367                 cz_enable_gfx_cg_power_gating(adev, true);
5368                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5369                         cz_enable_gfx_pipeline_power_gating(adev, true);
5370         } else {
5371                 cz_enable_gfx_cg_power_gating(adev, false);
5372                 cz_enable_gfx_pipeline_power_gating(adev, false);
5373         }
5374 }
5375
5376 static int gfx_v8_0_set_powergating_state(void *handle,
5377                                           enum amd_powergating_state state)
5378 {
5379         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5380         bool enable = (state == AMD_PG_STATE_GATE);
5381
5382         if (amdgpu_sriov_vf(adev))
5383                 return 0;
5384
5385         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5386                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5387                                 AMD_PG_SUPPORT_CP |
5388                                 AMD_PG_SUPPORT_GFX_DMG))
5389                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5390         switch (adev->asic_type) {
5391         case CHIP_CARRIZO:
5392         case CHIP_STONEY:
5393
5394                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5395                         cz_enable_sck_slow_down_on_power_up(adev, true);
5396                         cz_enable_sck_slow_down_on_power_down(adev, true);
5397                 } else {
5398                         cz_enable_sck_slow_down_on_power_up(adev, false);
5399                         cz_enable_sck_slow_down_on_power_down(adev, false);
5400                 }
5401                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5402                         cz_enable_cp_power_gating(adev, true);
5403                 else
5404                         cz_enable_cp_power_gating(adev, false);
5405
5406                 cz_update_gfx_cg_power_gating(adev, enable);
5407
5408                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5409                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5410                 else
5411                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5412
5413                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5414                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5415                 else
5416                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5417                 break;
5418         case CHIP_POLARIS11:
5419         case CHIP_POLARIS12:
5420         case CHIP_VEGAM:
5421                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5422                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5423                 else
5424                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5425
5426                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5427                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5428                 else
5429                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5430
5431                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5432                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5433                 else
5434                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5435                 break;
5436         default:
5437                 break;
5438         }
5439         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5440                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5441                                 AMD_PG_SUPPORT_CP |
5442                                 AMD_PG_SUPPORT_GFX_DMG))
5443                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5444         return 0;
5445 }
5446
5447 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5448 {
5449         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5450         int data;
5451
5452         if (amdgpu_sriov_vf(adev))
5453                 *flags = 0;
5454
5455         /* AMD_CG_SUPPORT_GFX_MGCG */
5456         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5457         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5458                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5459
5460         /* AMD_CG_SUPPORT_GFX_CGLG */
5461         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5462         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5463                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5464
5465         /* AMD_CG_SUPPORT_GFX_CGLS */
5466         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5467                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5468
5469         /* AMD_CG_SUPPORT_GFX_CGTS */
5470         data = RREG32(mmCGTS_SM_CTRL_REG);
5471         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5472                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5473
5474         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5475         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5476                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5477
5478         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5479         data = RREG32(mmRLC_MEM_SLP_CNTL);
5480         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5481                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5482
5483         /* AMD_CG_SUPPORT_GFX_CP_LS */
5484         data = RREG32(mmCP_MEM_SLP_CNTL);
5485         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5486                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5487 }
5488
5489 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5490                                      uint32_t reg_addr, uint32_t cmd)
5491 {
5492         uint32_t data;
5493
5494         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5495
5496         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5497         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5498
5499         data = RREG32(mmRLC_SERDES_WR_CTRL);
5500         if (adev->asic_type == CHIP_STONEY)
5501                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5502                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5503                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5504                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5505                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5506                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5507                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5508                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5509                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5510         else
5511                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5512                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5513                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5514                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5515                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5516                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5517                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5518                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5519                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5520                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5521                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5522         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5523                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5524                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5525                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5526
5527         WREG32(mmRLC_SERDES_WR_CTRL, data);
5528 }
5529
5530 #define MSG_ENTER_RLC_SAFE_MODE     1
5531 #define MSG_EXIT_RLC_SAFE_MODE      0
5532 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5533 #define RLC_GPR_REG2__REQ__SHIFT 0
5534 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5535 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5536
5537 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5538 {
5539         uint32_t rlc_setting;
5540
5541         rlc_setting = RREG32(mmRLC_CNTL);
5542         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5543                 return false;
5544
5545         return true;
5546 }
5547
5548 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5549 {
5550         uint32_t data;
5551         unsigned i;
5552         data = RREG32(mmRLC_CNTL);
5553         data |= RLC_SAFE_MODE__CMD_MASK;
5554         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5555         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5556         WREG32(mmRLC_SAFE_MODE, data);
5557
5558         /* wait for RLC_SAFE_MODE */
5559         for (i = 0; i < adev->usec_timeout; i++) {
5560                 if ((RREG32(mmRLC_GPM_STAT) &
5561                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5562                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5563                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5564                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5565                         break;
5566                 udelay(1);
5567         }
5568         for (i = 0; i < adev->usec_timeout; i++) {
5569                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5570                         break;
5571                 udelay(1);
5572         }
5573 }
5574
5575 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5576 {
5577         uint32_t data;
5578         unsigned i;
5579
5580         data = RREG32(mmRLC_CNTL);
5581         data |= RLC_SAFE_MODE__CMD_MASK;
5582         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5583         WREG32(mmRLC_SAFE_MODE, data);
5584
5585         for (i = 0; i < adev->usec_timeout; i++) {
5586                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5587                         break;
5588                 udelay(1);
5589         }
5590 }
5591
5592 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5593         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5594         .set_safe_mode = gfx_v8_0_set_safe_mode,
5595         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5596         .init = gfx_v8_0_rlc_init,
5597         .get_csb_size = gfx_v8_0_get_csb_size,
5598         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5599         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5600         .resume = gfx_v8_0_rlc_resume,
5601         .stop = gfx_v8_0_rlc_stop,
5602         .reset = gfx_v8_0_rlc_reset,
5603         .start = gfx_v8_0_rlc_start
5604 };
5605
5606 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5607                                                       bool enable)
5608 {
5609         uint32_t temp, data;
5610
5611         amdgpu_gfx_rlc_enter_safe_mode(adev);
5612
5613         /* It is disabled by HW by default */
5614         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5615                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5616                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5617                                 /* 1 - RLC memory Light sleep */
5618                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5619
5620                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5621                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5622                 }
5623
5624                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5625                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5626                 if (adev->flags & AMD_IS_APU)
5627                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5628                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5629                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5630                 else
5631                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5632                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5633                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5634                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5635
5636                 if (temp != data)
5637                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5638
5639                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5640                 gfx_v8_0_wait_for_rlc_serdes(adev);
5641
5642                 /* 5 - clear mgcg override */
5643                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5644
5645                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5646                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5647                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5648                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5649                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5650                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5651                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5652                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5653                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5654                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5655                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5656                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5657                         if (temp != data)
5658                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5659                 }
5660                 udelay(50);
5661
5662                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5663                 gfx_v8_0_wait_for_rlc_serdes(adev);
5664         } else {
5665                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5666                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5667                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5668                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5669                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5670                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5671                 if (temp != data)
5672                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5673
5674                 /* 2 - disable MGLS in RLC */
5675                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5676                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5677                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5678                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5679                 }
5680
5681                 /* 3 - disable MGLS in CP */
5682                 data = RREG32(mmCP_MEM_SLP_CNTL);
5683                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5684                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5685                         WREG32(mmCP_MEM_SLP_CNTL, data);
5686                 }
5687
5688                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5689                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5690                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5691                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5692                 if (temp != data)
5693                         WREG32(mmCGTS_SM_CTRL_REG, data);
5694
5695                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5696                 gfx_v8_0_wait_for_rlc_serdes(adev);
5697
5698                 /* 6 - set mgcg override */
5699                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5700
5701                 udelay(50);
5702
5703                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704                 gfx_v8_0_wait_for_rlc_serdes(adev);
5705         }
5706
5707         amdgpu_gfx_rlc_exit_safe_mode(adev);
5708 }
5709
5710 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5711                                                       bool enable)
5712 {
5713         uint32_t temp, temp1, data, data1;
5714
5715         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5716
5717         amdgpu_gfx_rlc_enter_safe_mode(adev);
5718
5719         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5720                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5721                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5722                 if (temp1 != data1)
5723                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5724
5725                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5726                 gfx_v8_0_wait_for_rlc_serdes(adev);
5727
5728                 /* 2 - clear cgcg override */
5729                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5730
5731                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5732                 gfx_v8_0_wait_for_rlc_serdes(adev);
5733
5734                 /* 3 - write cmd to set CGLS */
5735                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5736
5737                 /* 4 - enable cgcg */
5738                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5739
5740                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5741                         /* enable cgls*/
5742                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5743
5744                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5745                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5746
5747                         if (temp1 != data1)
5748                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5749                 } else {
5750                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5751                 }
5752
5753                 if (temp != data)
5754                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5755
5756                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5757                  * Cmp_busy/GFX_Idle interrupts
5758                  */
5759                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5760         } else {
5761                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5762                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5763
5764                 /* TEST CGCG */
5765                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5766                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5767                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5768                 if (temp1 != data1)
5769                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5770
5771                 /* read gfx register to wake up cgcg */
5772                 RREG32(mmCB_CGTT_SCLK_CTRL);
5773                 RREG32(mmCB_CGTT_SCLK_CTRL);
5774                 RREG32(mmCB_CGTT_SCLK_CTRL);
5775                 RREG32(mmCB_CGTT_SCLK_CTRL);
5776
5777                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5778                 gfx_v8_0_wait_for_rlc_serdes(adev);
5779
5780                 /* write cmd to Set CGCG Overrride */
5781                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5782
5783                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5784                 gfx_v8_0_wait_for_rlc_serdes(adev);
5785
5786                 /* write cmd to Clear CGLS */
5787                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5788
5789                 /* disable cgcg, cgls should be disabled too. */
5790                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5791                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5792                 if (temp != data)
5793                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5794                 /* enable interrupts again for PG */
5795                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5796         }
5797
5798         gfx_v8_0_wait_for_rlc_serdes(adev);
5799
5800         amdgpu_gfx_rlc_exit_safe_mode(adev);
5801 }
5802 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5803                                             bool enable)
5804 {
5805         if (enable) {
5806                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5807                  * ===  MGCG + MGLS + TS(CG/LS) ===
5808                  */
5809                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5810                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5811         } else {
5812                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5813                  * ===  CGCG + CGLS ===
5814                  */
5815                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5816                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5817         }
5818         return 0;
5819 }
5820
5821 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5822                                           enum amd_clockgating_state state)
5823 {
5824         uint32_t msg_id, pp_state = 0;
5825         uint32_t pp_support_state = 0;
5826
5827         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5828                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5829                         pp_support_state = PP_STATE_SUPPORT_LS;
5830                         pp_state = PP_STATE_LS;
5831                 }
5832                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5833                         pp_support_state |= PP_STATE_SUPPORT_CG;
5834                         pp_state |= PP_STATE_CG;
5835                 }
5836                 if (state == AMD_CG_STATE_UNGATE)
5837                         pp_state = 0;
5838
5839                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5840                                 PP_BLOCK_GFX_CG,
5841                                 pp_support_state,
5842                                 pp_state);
5843                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5844                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5845         }
5846
5847         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5848                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5849                         pp_support_state = PP_STATE_SUPPORT_LS;
5850                         pp_state = PP_STATE_LS;
5851                 }
5852
5853                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5854                         pp_support_state |= PP_STATE_SUPPORT_CG;
5855                         pp_state |= PP_STATE_CG;
5856                 }
5857
5858                 if (state == AMD_CG_STATE_UNGATE)
5859                         pp_state = 0;
5860
5861                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5862                                 PP_BLOCK_GFX_MG,
5863                                 pp_support_state,
5864                                 pp_state);
5865                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5866                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5867         }
5868
5869         return 0;
5870 }
5871
5872 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5873                                           enum amd_clockgating_state state)
5874 {
5875
5876         uint32_t msg_id, pp_state = 0;
5877         uint32_t pp_support_state = 0;
5878
5879         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5880                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5881                         pp_support_state = PP_STATE_SUPPORT_LS;
5882                         pp_state = PP_STATE_LS;
5883                 }
5884                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5885                         pp_support_state |= PP_STATE_SUPPORT_CG;
5886                         pp_state |= PP_STATE_CG;
5887                 }
5888                 if (state == AMD_CG_STATE_UNGATE)
5889                         pp_state = 0;
5890
5891                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5892                                 PP_BLOCK_GFX_CG,
5893                                 pp_support_state,
5894                                 pp_state);
5895                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5896                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5897         }
5898
5899         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5900                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5901                         pp_support_state = PP_STATE_SUPPORT_LS;
5902                         pp_state = PP_STATE_LS;
5903                 }
5904                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5905                         pp_support_state |= PP_STATE_SUPPORT_CG;
5906                         pp_state |= PP_STATE_CG;
5907                 }
5908                 if (state == AMD_CG_STATE_UNGATE)
5909                         pp_state = 0;
5910
5911                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5912                                 PP_BLOCK_GFX_3D,
5913                                 pp_support_state,
5914                                 pp_state);
5915                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5916                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5917         }
5918
5919         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5920                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5921                         pp_support_state = PP_STATE_SUPPORT_LS;
5922                         pp_state = PP_STATE_LS;
5923                 }
5924
5925                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5926                         pp_support_state |= PP_STATE_SUPPORT_CG;
5927                         pp_state |= PP_STATE_CG;
5928                 }
5929
5930                 if (state == AMD_CG_STATE_UNGATE)
5931                         pp_state = 0;
5932
5933                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5934                                 PP_BLOCK_GFX_MG,
5935                                 pp_support_state,
5936                                 pp_state);
5937                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5938                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5939         }
5940
5941         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5942                 pp_support_state = PP_STATE_SUPPORT_LS;
5943
5944                 if (state == AMD_CG_STATE_UNGATE)
5945                         pp_state = 0;
5946                 else
5947                         pp_state = PP_STATE_LS;
5948
5949                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5950                                 PP_BLOCK_GFX_RLC,
5951                                 pp_support_state,
5952                                 pp_state);
5953                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5954                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5955         }
5956
5957         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5958                 pp_support_state = PP_STATE_SUPPORT_LS;
5959
5960                 if (state == AMD_CG_STATE_UNGATE)
5961                         pp_state = 0;
5962                 else
5963                         pp_state = PP_STATE_LS;
5964                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5965                         PP_BLOCK_GFX_CP,
5966                         pp_support_state,
5967                         pp_state);
5968                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5969                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5970         }
5971
5972         return 0;
5973 }
5974
5975 static int gfx_v8_0_set_clockgating_state(void *handle,
5976                                           enum amd_clockgating_state state)
5977 {
5978         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5979
5980         if (amdgpu_sriov_vf(adev))
5981                 return 0;
5982
5983         switch (adev->asic_type) {
5984         case CHIP_FIJI:
5985         case CHIP_CARRIZO:
5986         case CHIP_STONEY:
5987                 gfx_v8_0_update_gfx_clock_gating(adev,
5988                                                  state == AMD_CG_STATE_GATE);
5989                 break;
5990         case CHIP_TONGA:
5991                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5992                 break;
5993         case CHIP_POLARIS10:
5994         case CHIP_POLARIS11:
5995         case CHIP_POLARIS12:
5996         case CHIP_VEGAM:
5997                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5998                 break;
5999         default:
6000                 break;
6001         }
6002         return 0;
6003 }
6004
6005 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6006 {
6007         return ring->adev->wb.wb[ring->rptr_offs];
6008 }
6009
6010 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6011 {
6012         struct amdgpu_device *adev = ring->adev;
6013
6014         if (ring->use_doorbell)
6015                 /* XXX check if swapping is necessary on BE */
6016                 return ring->adev->wb.wb[ring->wptr_offs];
6017         else
6018                 return RREG32(mmCP_RB0_WPTR);
6019 }
6020
6021 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6022 {
6023         struct amdgpu_device *adev = ring->adev;
6024
6025         if (ring->use_doorbell) {
6026                 /* XXX check if swapping is necessary on BE */
6027                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6028                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6029         } else {
6030                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6031                 (void)RREG32(mmCP_RB0_WPTR);
6032         }
6033 }
6034
6035 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6036 {
6037         u32 ref_and_mask, reg_mem_engine;
6038
6039         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6040             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6041                 switch (ring->me) {
6042                 case 1:
6043                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6044                         break;
6045                 case 2:
6046                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6047                         break;
6048                 default:
6049                         return;
6050                 }
6051                 reg_mem_engine = 0;
6052         } else {
6053                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6054                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6055         }
6056
6057         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6058         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6059                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6060                                  reg_mem_engine));
6061         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6062         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6063         amdgpu_ring_write(ring, ref_and_mask);
6064         amdgpu_ring_write(ring, ref_and_mask);
6065         amdgpu_ring_write(ring, 0x20); /* poll interval */
6066 }
6067
6068 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6069 {
6070         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6071         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6072                 EVENT_INDEX(4));
6073
6074         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6075         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6076                 EVENT_INDEX(0));
6077 }
6078
6079 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6080                                         struct amdgpu_job *job,
6081                                         struct amdgpu_ib *ib,
6082                                         uint32_t flags)
6083 {
6084         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6085         u32 header, control = 0;
6086
6087         if (ib->flags & AMDGPU_IB_FLAG_CE)
6088                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6089         else
6090                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6091
6092         control |= ib->length_dw | (vmid << 24);
6093
6094         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6095                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6096
6097                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6098                         gfx_v8_0_ring_emit_de_meta(ring);
6099         }
6100
6101         amdgpu_ring_write(ring, header);
6102         amdgpu_ring_write(ring,
6103 #ifdef __BIG_ENDIAN
6104                           (2 << 0) |
6105 #endif
6106                           (ib->gpu_addr & 0xFFFFFFFC));
6107         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6108         amdgpu_ring_write(ring, control);
6109 }
6110
6111 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6112                                           struct amdgpu_job *job,
6113                                           struct amdgpu_ib *ib,
6114                                           uint32_t flags)
6115 {
6116         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6117         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6118
6119         /* Currently, there is a high possibility to get wave ID mismatch
6120          * between ME and GDS, leading to a hw deadlock, because ME generates
6121          * different wave IDs than the GDS expects. This situation happens
6122          * randomly when at least 5 compute pipes use GDS ordered append.
6123          * The wave IDs generated by ME are also wrong after suspend/resume.
6124          * Those are probably bugs somewhere else in the kernel driver.
6125          *
6126          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6127          * GDS to 0 for this ring (me/pipe).
6128          */
6129         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6130                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6131                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6132                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6133         }
6134
6135         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6136         amdgpu_ring_write(ring,
6137 #ifdef __BIG_ENDIAN
6138                                 (2 << 0) |
6139 #endif
6140                                 (ib->gpu_addr & 0xFFFFFFFC));
6141         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6142         amdgpu_ring_write(ring, control);
6143 }
6144
6145 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6146                                          u64 seq, unsigned flags)
6147 {
6148         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6149         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6150
6151         /* Workaround for cache flush problems. First send a dummy EOP
6152          * event down the pipe with seq one below.
6153          */
6154         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6155         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6156                                  EOP_TC_ACTION_EN |
6157                                  EOP_TC_WB_ACTION_EN |
6158                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6159                                  EVENT_INDEX(5)));
6160         amdgpu_ring_write(ring, addr & 0xfffffffc);
6161         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6162                                 DATA_SEL(1) | INT_SEL(0));
6163         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6164         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6165
6166         /* Then send the real EOP event down the pipe:
6167          * EVENT_WRITE_EOP - flush caches, send int */
6168         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6169         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6170                                  EOP_TC_ACTION_EN |
6171                                  EOP_TC_WB_ACTION_EN |
6172                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6173                                  EVENT_INDEX(5)));
6174         amdgpu_ring_write(ring, addr & 0xfffffffc);
6175         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6176                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6177         amdgpu_ring_write(ring, lower_32_bits(seq));
6178         amdgpu_ring_write(ring, upper_32_bits(seq));
6179
6180 }
6181
6182 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6183 {
6184         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6185         uint32_t seq = ring->fence_drv.sync_seq;
6186         uint64_t addr = ring->fence_drv.gpu_addr;
6187
6188         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6189         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6190                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6191                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6192         amdgpu_ring_write(ring, addr & 0xfffffffc);
6193         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6194         amdgpu_ring_write(ring, seq);
6195         amdgpu_ring_write(ring, 0xffffffff);
6196         amdgpu_ring_write(ring, 4); /* poll interval */
6197 }
6198
6199 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6200                                         unsigned vmid, uint64_t pd_addr)
6201 {
6202         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6203
6204         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6205
6206         /* wait for the invalidate to complete */
6207         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6208         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6209                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6210                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6211         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6212         amdgpu_ring_write(ring, 0);
6213         amdgpu_ring_write(ring, 0); /* ref */
6214         amdgpu_ring_write(ring, 0); /* mask */
6215         amdgpu_ring_write(ring, 0x20); /* poll interval */
6216
6217         /* compute doesn't have PFP */
6218         if (usepfp) {
6219                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6220                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6221                 amdgpu_ring_write(ring, 0x0);
6222         }
6223 }
6224
6225 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6226 {
6227         return ring->adev->wb.wb[ring->wptr_offs];
6228 }
6229
6230 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6231 {
6232         struct amdgpu_device *adev = ring->adev;
6233
6234         /* XXX check if swapping is necessary on BE */
6235         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6236         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6237 }
6238
6239 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6240                                            bool acquire)
6241 {
6242         struct amdgpu_device *adev = ring->adev;
6243         int pipe_num, tmp, reg;
6244         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6245
6246         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6247
6248         /* first me only has 2 entries, GFX and HP3D */
6249         if (ring->me > 0)
6250                 pipe_num -= 2;
6251
6252         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6253         tmp = RREG32(reg);
6254         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6255         WREG32(reg, tmp);
6256 }
6257
6258 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6259                                             struct amdgpu_ring *ring,
6260                                             bool acquire)
6261 {
6262         int i, pipe;
6263         bool reserve;
6264         struct amdgpu_ring *iring;
6265
6266         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6267         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6268         if (acquire)
6269                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6270         else
6271                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6272
6273         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6274                 /* Clear all reservations - everyone reacquires all resources */
6275                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6276                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6277                                                        true);
6278
6279                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6280                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6281                                                        true);
6282         } else {
6283                 /* Lower all pipes without a current reservation */
6284                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6285                         iring = &adev->gfx.gfx_ring[i];
6286                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6287                                                            iring->me,
6288                                                            iring->pipe,
6289                                                            0);
6290                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6291                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6292                 }
6293
6294                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6295                         iring = &adev->gfx.compute_ring[i];
6296                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6297                                                            iring->me,
6298                                                            iring->pipe,
6299                                                            0);
6300                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6301                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6302                 }
6303         }
6304
6305         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6306 }
6307
6308 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6309                                       struct amdgpu_ring *ring,
6310                                       bool acquire)
6311 {
6312         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6313         uint32_t queue_priority = acquire ? 0xf : 0x0;
6314
6315         mutex_lock(&adev->srbm_mutex);
6316         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6317
6318         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6319         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6320
6321         vi_srbm_select(adev, 0, 0, 0, 0);
6322         mutex_unlock(&adev->srbm_mutex);
6323 }
6324 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6325                                                enum drm_sched_priority priority)
6326 {
6327         struct amdgpu_device *adev = ring->adev;
6328         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6329
6330         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6331                 return;
6332
6333         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6334         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6335 }
6336
6337 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6338                                              u64 addr, u64 seq,
6339                                              unsigned flags)
6340 {
6341         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6342         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6343
6344         /* RELEASE_MEM - flush caches, send int */
6345         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6346         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6347                                  EOP_TC_ACTION_EN |
6348                                  EOP_TC_WB_ACTION_EN |
6349                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6350                                  EVENT_INDEX(5)));
6351         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6352         amdgpu_ring_write(ring, addr & 0xfffffffc);
6353         amdgpu_ring_write(ring, upper_32_bits(addr));
6354         amdgpu_ring_write(ring, lower_32_bits(seq));
6355         amdgpu_ring_write(ring, upper_32_bits(seq));
6356 }
6357
6358 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6359                                          u64 seq, unsigned int flags)
6360 {
6361         /* we only allocate 32bit for each seq wb address */
6362         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6363
6364         /* write fence seq to the "addr" */
6365         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6366         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6367                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6368         amdgpu_ring_write(ring, lower_32_bits(addr));
6369         amdgpu_ring_write(ring, upper_32_bits(addr));
6370         amdgpu_ring_write(ring, lower_32_bits(seq));
6371
6372         if (flags & AMDGPU_FENCE_FLAG_INT) {
6373                 /* set register to trigger INT */
6374                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6375                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6376                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6377                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6378                 amdgpu_ring_write(ring, 0);
6379                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6380         }
6381 }
6382
6383 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6384 {
6385         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6386         amdgpu_ring_write(ring, 0);
6387 }
6388
6389 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6390 {
6391         uint32_t dw2 = 0;
6392
6393         if (amdgpu_sriov_vf(ring->adev))
6394                 gfx_v8_0_ring_emit_ce_meta(ring);
6395
6396         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6397         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6398                 gfx_v8_0_ring_emit_vgt_flush(ring);
6399                 /* set load_global_config & load_global_uconfig */
6400                 dw2 |= 0x8001;
6401                 /* set load_cs_sh_regs */
6402                 dw2 |= 0x01000000;
6403                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6404                 dw2 |= 0x10002;
6405
6406                 /* set load_ce_ram if preamble presented */
6407                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6408                         dw2 |= 0x10000000;
6409         } else {
6410                 /* still load_ce_ram if this is the first time preamble presented
6411                  * although there is no context switch happens.
6412                  */
6413                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6414                         dw2 |= 0x10000000;
6415         }
6416
6417         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6418         amdgpu_ring_write(ring, dw2);
6419         amdgpu_ring_write(ring, 0);
6420 }
6421
6422 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6423 {
6424         unsigned ret;
6425
6426         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6427         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6428         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6429         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6430         ret = ring->wptr & ring->buf_mask;
6431         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6432         return ret;
6433 }
6434
6435 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6436 {
6437         unsigned cur;
6438
6439         BUG_ON(offset > ring->buf_mask);
6440         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6441
6442         cur = (ring->wptr & ring->buf_mask) - 1;
6443         if (likely(cur > offset))
6444                 ring->ring[offset] = cur - offset;
6445         else
6446                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6447 }
6448
6449 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6450 {
6451         struct amdgpu_device *adev = ring->adev;
6452         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
6453
6454         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6455         amdgpu_ring_write(ring, 0 |     /* src: register*/
6456                                 (5 << 8) |      /* dst: memory */
6457                                 (1 << 20));     /* write confirm */
6458         amdgpu_ring_write(ring, reg);
6459         amdgpu_ring_write(ring, 0);
6460         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6461                                 kiq->reg_val_offs * 4));
6462         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6463                                 kiq->reg_val_offs * 4));
6464 }
6465
6466 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6467                                   uint32_t val)
6468 {
6469         uint32_t cmd;
6470
6471         switch (ring->funcs->type) {
6472         case AMDGPU_RING_TYPE_GFX:
6473                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6474                 break;
6475         case AMDGPU_RING_TYPE_KIQ:
6476                 cmd = 1 << 16; /* no inc addr */
6477                 break;
6478         default:
6479                 cmd = WR_CONFIRM;
6480                 break;
6481         }
6482
6483         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6484         amdgpu_ring_write(ring, cmd);
6485         amdgpu_ring_write(ring, reg);
6486         amdgpu_ring_write(ring, 0);
6487         amdgpu_ring_write(ring, val);
6488 }
6489
6490 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6491 {
6492         struct amdgpu_device *adev = ring->adev;
6493         uint32_t value = 0;
6494
6495         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6496         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6497         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6498         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6499         WREG32(mmSQ_CMD, value);
6500 }
6501
6502 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6503                                                  enum amdgpu_interrupt_state state)
6504 {
6505         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6506                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6507 }
6508
6509 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6510                                                      int me, int pipe,
6511                                                      enum amdgpu_interrupt_state state)
6512 {
6513         u32 mec_int_cntl, mec_int_cntl_reg;
6514
6515         /*
6516          * amdgpu controls only the first MEC. That's why this function only
6517          * handles the setting of interrupts for this specific MEC. All other
6518          * pipes' interrupts are set by amdkfd.
6519          */
6520
6521         if (me == 1) {
6522                 switch (pipe) {
6523                 case 0:
6524                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6525                         break;
6526                 case 1:
6527                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6528                         break;
6529                 case 2:
6530                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6531                         break;
6532                 case 3:
6533                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6534                         break;
6535                 default:
6536                         DRM_DEBUG("invalid pipe %d\n", pipe);
6537                         return;
6538                 }
6539         } else {
6540                 DRM_DEBUG("invalid me %d\n", me);
6541                 return;
6542         }
6543
6544         switch (state) {
6545         case AMDGPU_IRQ_STATE_DISABLE:
6546                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6547                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6548                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6549                 break;
6550         case AMDGPU_IRQ_STATE_ENABLE:
6551                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6552                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6553                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6554                 break;
6555         default:
6556                 break;
6557         }
6558 }
6559
6560 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6561                                              struct amdgpu_irq_src *source,
6562                                              unsigned type,
6563                                              enum amdgpu_interrupt_state state)
6564 {
6565         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6566                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6567
6568         return 0;
6569 }
6570
6571 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6572                                               struct amdgpu_irq_src *source,
6573                                               unsigned type,
6574                                               enum amdgpu_interrupt_state state)
6575 {
6576         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6577                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6578
6579         return 0;
6580 }
6581
6582 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6583                                             struct amdgpu_irq_src *src,
6584                                             unsigned type,
6585                                             enum amdgpu_interrupt_state state)
6586 {
6587         switch (type) {
6588         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6589                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6590                 break;
6591         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6592                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6593                 break;
6594         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6595                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6596                 break;
6597         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6598                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6599                 break;
6600         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6601                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6602                 break;
6603         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6604                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6605                 break;
6606         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6607                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6608                 break;
6609         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6610                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6611                 break;
6612         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6613                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6614                 break;
6615         default:
6616                 break;
6617         }
6618         return 0;
6619 }
6620
6621 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6622                                          struct amdgpu_irq_src *source,
6623                                          unsigned int type,
6624                                          enum amdgpu_interrupt_state state)
6625 {
6626         int enable_flag;
6627
6628         switch (state) {
6629         case AMDGPU_IRQ_STATE_DISABLE:
6630                 enable_flag = 0;
6631                 break;
6632
6633         case AMDGPU_IRQ_STATE_ENABLE:
6634                 enable_flag = 1;
6635                 break;
6636
6637         default:
6638                 return -EINVAL;
6639         }
6640
6641         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6642         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6643         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6644         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6645         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6646         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6647                      enable_flag);
6648         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6649                      enable_flag);
6650         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6651                      enable_flag);
6652         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6653                      enable_flag);
6654         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6655                      enable_flag);
6656         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6657                      enable_flag);
6658         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6659                      enable_flag);
6660         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6661                      enable_flag);
6662
6663         return 0;
6664 }
6665
6666 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6667                                      struct amdgpu_irq_src *source,
6668                                      unsigned int type,
6669                                      enum amdgpu_interrupt_state state)
6670 {
6671         int enable_flag;
6672
6673         switch (state) {
6674         case AMDGPU_IRQ_STATE_DISABLE:
6675                 enable_flag = 1;
6676                 break;
6677
6678         case AMDGPU_IRQ_STATE_ENABLE:
6679                 enable_flag = 0;
6680                 break;
6681
6682         default:
6683                 return -EINVAL;
6684         }
6685
6686         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6687                      enable_flag);
6688
6689         return 0;
6690 }
6691
6692 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6693                             struct amdgpu_irq_src *source,
6694                             struct amdgpu_iv_entry *entry)
6695 {
6696         int i;
6697         u8 me_id, pipe_id, queue_id;
6698         struct amdgpu_ring *ring;
6699
6700         DRM_DEBUG("IH: CP EOP\n");
6701         me_id = (entry->ring_id & 0x0c) >> 2;
6702         pipe_id = (entry->ring_id & 0x03) >> 0;
6703         queue_id = (entry->ring_id & 0x70) >> 4;
6704
6705         switch (me_id) {
6706         case 0:
6707                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6708                 break;
6709         case 1:
6710         case 2:
6711                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6712                         ring = &adev->gfx.compute_ring[i];
6713                         /* Per-queue interrupt is supported for MEC starting from VI.
6714                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6715                           */
6716                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6717                                 amdgpu_fence_process(ring);
6718                 }
6719                 break;
6720         }
6721         return 0;
6722 }
6723
6724 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6725                            struct amdgpu_iv_entry *entry)
6726 {
6727         u8 me_id, pipe_id, queue_id;
6728         struct amdgpu_ring *ring;
6729         int i;
6730
6731         me_id = (entry->ring_id & 0x0c) >> 2;
6732         pipe_id = (entry->ring_id & 0x03) >> 0;
6733         queue_id = (entry->ring_id & 0x70) >> 4;
6734
6735         switch (me_id) {
6736         case 0:
6737                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6738                 break;
6739         case 1:
6740         case 2:
6741                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6742                         ring = &adev->gfx.compute_ring[i];
6743                         if (ring->me == me_id && ring->pipe == pipe_id &&
6744                             ring->queue == queue_id)
6745                                 drm_sched_fault(&ring->sched);
6746                 }
6747                 break;
6748         }
6749 }
6750
6751 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6752                                  struct amdgpu_irq_src *source,
6753                                  struct amdgpu_iv_entry *entry)
6754 {
6755         DRM_ERROR("Illegal register access in command stream\n");
6756         gfx_v8_0_fault(adev, entry);
6757         return 0;
6758 }
6759
6760 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6761                                   struct amdgpu_irq_src *source,
6762                                   struct amdgpu_iv_entry *entry)
6763 {
6764         DRM_ERROR("Illegal instruction in command stream\n");
6765         gfx_v8_0_fault(adev, entry);
6766         return 0;
6767 }
6768
6769 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6770                                      struct amdgpu_irq_src *source,
6771                                      struct amdgpu_iv_entry *entry)
6772 {
6773         DRM_ERROR("CP EDC/ECC error detected.");
6774         return 0;
6775 }
6776
6777 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6778 {
6779         u32 enc, se_id, sh_id, cu_id;
6780         char type[20];
6781         int sq_edc_source = -1;
6782
6783         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6784         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6785
6786         switch (enc) {
6787                 case 0:
6788                         DRM_INFO("SQ general purpose intr detected:"
6789                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6790                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6791                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6792                                         "wlt %d, thread_trace %d.\n",
6793                                         se_id,
6794                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6795                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6796                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6797                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6798                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6799                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6800                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6801                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6802                                         );
6803                         break;
6804                 case 1:
6805                 case 2:
6806
6807                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6808                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6809
6810                         /*
6811                          * This function can be called either directly from ISR
6812                          * or from BH in which case we can access SQ_EDC_INFO
6813                          * instance
6814                          */
6815                         if (in_task()) {
6816                                 mutex_lock(&adev->grbm_idx_mutex);
6817                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6818
6819                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6820
6821                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6822                                 mutex_unlock(&adev->grbm_idx_mutex);
6823                         }
6824
6825                         if (enc == 1)
6826                                 sprintf(type, "instruction intr");
6827                         else
6828                                 sprintf(type, "EDC/ECC error");
6829
6830                         DRM_INFO(
6831                                 "SQ %s detected: "
6832                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6833                                         "trap %s, sq_ed_info.source %s.\n",
6834                                         type, se_id, sh_id, cu_id,
6835                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6836                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6837                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6838                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6839                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6840                                 );
6841                         break;
6842                 default:
6843                         DRM_ERROR("SQ invalid encoding type\n.");
6844         }
6845 }
6846
6847 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6848 {
6849
6850         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6851         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6852
6853         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6854 }
6855
6856 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6857                            struct amdgpu_irq_src *source,
6858                            struct amdgpu_iv_entry *entry)
6859 {
6860         unsigned ih_data = entry->src_data[0];
6861
6862         /*
6863          * Try to submit work so SQ_EDC_INFO can be accessed from
6864          * BH. If previous work submission hasn't finished yet
6865          * just print whatever info is possible directly from the ISR.
6866          */
6867         if (work_pending(&adev->gfx.sq_work.work)) {
6868                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6869         } else {
6870                 adev->gfx.sq_work.ih_data = ih_data;
6871                 schedule_work(&adev->gfx.sq_work.work);
6872         }
6873
6874         return 0;
6875 }
6876
6877 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6878         .name = "gfx_v8_0",
6879         .early_init = gfx_v8_0_early_init,
6880         .late_init = gfx_v8_0_late_init,
6881         .sw_init = gfx_v8_0_sw_init,
6882         .sw_fini = gfx_v8_0_sw_fini,
6883         .hw_init = gfx_v8_0_hw_init,
6884         .hw_fini = gfx_v8_0_hw_fini,
6885         .suspend = gfx_v8_0_suspend,
6886         .resume = gfx_v8_0_resume,
6887         .is_idle = gfx_v8_0_is_idle,
6888         .wait_for_idle = gfx_v8_0_wait_for_idle,
6889         .check_soft_reset = gfx_v8_0_check_soft_reset,
6890         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6891         .soft_reset = gfx_v8_0_soft_reset,
6892         .post_soft_reset = gfx_v8_0_post_soft_reset,
6893         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6894         .set_powergating_state = gfx_v8_0_set_powergating_state,
6895         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6896 };
6897
6898 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6899         .type = AMDGPU_RING_TYPE_GFX,
6900         .align_mask = 0xff,
6901         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6902         .support_64bit_ptrs = false,
6903         .get_rptr = gfx_v8_0_ring_get_rptr,
6904         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6905         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6906         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6907                 5 +  /* COND_EXEC */
6908                 7 +  /* PIPELINE_SYNC */
6909                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6910                 12 +  /* FENCE for VM_FLUSH */
6911                 20 + /* GDS switch */
6912                 4 + /* double SWITCH_BUFFER,
6913                        the first COND_EXEC jump to the place just
6914                            prior to this double SWITCH_BUFFER  */
6915                 5 + /* COND_EXEC */
6916                 7 +      /*     HDP_flush */
6917                 4 +      /*     VGT_flush */
6918                 14 + /* CE_META */
6919                 31 + /* DE_META */
6920                 3 + /* CNTX_CTRL */
6921                 5 + /* HDP_INVL */
6922                 12 + 12 + /* FENCE x2 */
6923                 2, /* SWITCH_BUFFER */
6924         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6925         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6926         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6927         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6928         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6929         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6930         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6931         .test_ring = gfx_v8_0_ring_test_ring,
6932         .test_ib = gfx_v8_0_ring_test_ib,
6933         .insert_nop = amdgpu_ring_insert_nop,
6934         .pad_ib = amdgpu_ring_generic_pad_ib,
6935         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6936         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6937         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6938         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6939         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6940         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6941 };
6942
6943 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6944         .type = AMDGPU_RING_TYPE_COMPUTE,
6945         .align_mask = 0xff,
6946         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6947         .support_64bit_ptrs = false,
6948         .get_rptr = gfx_v8_0_ring_get_rptr,
6949         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6950         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6951         .emit_frame_size =
6952                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6953                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6954                 5 + /* hdp_invalidate */
6955                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6956                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6957                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6958         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6959         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6960         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6961         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6962         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6963         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6964         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6965         .test_ring = gfx_v8_0_ring_test_ring,
6966         .test_ib = gfx_v8_0_ring_test_ib,
6967         .insert_nop = amdgpu_ring_insert_nop,
6968         .pad_ib = amdgpu_ring_generic_pad_ib,
6969         .set_priority = gfx_v8_0_ring_set_priority_compute,
6970         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6971 };
6972
6973 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6974         .type = AMDGPU_RING_TYPE_KIQ,
6975         .align_mask = 0xff,
6976         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6977         .support_64bit_ptrs = false,
6978         .get_rptr = gfx_v8_0_ring_get_rptr,
6979         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6980         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6981         .emit_frame_size =
6982                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6983                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6984                 5 + /* hdp_invalidate */
6985                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6986                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6987                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6988         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6989         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6990         .test_ring = gfx_v8_0_ring_test_ring,
6991         .insert_nop = amdgpu_ring_insert_nop,
6992         .pad_ib = amdgpu_ring_generic_pad_ib,
6993         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6994         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6995 };
6996
6997 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6998 {
6999         int i;
7000
7001         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7002
7003         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7004                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7005
7006         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7007                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7008 }
7009
7010 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7011         .set = gfx_v8_0_set_eop_interrupt_state,
7012         .process = gfx_v8_0_eop_irq,
7013 };
7014
7015 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7016         .set = gfx_v8_0_set_priv_reg_fault_state,
7017         .process = gfx_v8_0_priv_reg_irq,
7018 };
7019
7020 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7021         .set = gfx_v8_0_set_priv_inst_fault_state,
7022         .process = gfx_v8_0_priv_inst_irq,
7023 };
7024
7025 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7026         .set = gfx_v8_0_set_cp_ecc_int_state,
7027         .process = gfx_v8_0_cp_ecc_error_irq,
7028 };
7029
7030 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7031         .set = gfx_v8_0_set_sq_int_state,
7032         .process = gfx_v8_0_sq_irq,
7033 };
7034
7035 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7036 {
7037         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7038         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7039
7040         adev->gfx.priv_reg_irq.num_types = 1;
7041         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7042
7043         adev->gfx.priv_inst_irq.num_types = 1;
7044         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7045
7046         adev->gfx.cp_ecc_error_irq.num_types = 1;
7047         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7048
7049         adev->gfx.sq_irq.num_types = 1;
7050         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7051 }
7052
7053 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7054 {
7055         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7056 }
7057
7058 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7059 {
7060         /* init asci gds info */
7061         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7062         adev->gds.gws_size = 64;
7063         adev->gds.oa_size = 16;
7064         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7065 }
7066
7067 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7068                                                  u32 bitmap)
7069 {
7070         u32 data;
7071
7072         if (!bitmap)
7073                 return;
7074
7075         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7076         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7077
7078         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7079 }
7080
7081 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7082 {
7083         u32 data, mask;
7084
7085         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7086                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7087
7088         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7089
7090         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7091 }
7092
7093 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7094 {
7095         int i, j, k, counter, active_cu_number = 0;
7096         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7097         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7098         unsigned disable_masks[4 * 2];
7099         u32 ao_cu_num;
7100
7101         memset(cu_info, 0, sizeof(*cu_info));
7102
7103         if (adev->flags & AMD_IS_APU)
7104                 ao_cu_num = 2;
7105         else
7106                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7107
7108         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7109
7110         mutex_lock(&adev->grbm_idx_mutex);
7111         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7112                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7113                         mask = 1;
7114                         ao_bitmap = 0;
7115                         counter = 0;
7116                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7117                         if (i < 4 && j < 2)
7118                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7119                                         adev, disable_masks[i * 2 + j]);
7120                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7121                         cu_info->bitmap[i][j] = bitmap;
7122
7123                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7124                                 if (bitmap & mask) {
7125                                         if (counter < ao_cu_num)
7126                                                 ao_bitmap |= mask;
7127                                         counter ++;
7128                                 }
7129                                 mask <<= 1;
7130                         }
7131                         active_cu_number += counter;
7132                         if (i < 2 && j < 2)
7133                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7134                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7135                 }
7136         }
7137         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7138         mutex_unlock(&adev->grbm_idx_mutex);
7139
7140         cu_info->number = active_cu_number;
7141         cu_info->ao_cu_mask = ao_cu_mask;
7142         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7143         cu_info->max_waves_per_simd = 10;
7144         cu_info->max_scratch_slots_per_cu = 32;
7145         cu_info->wave_front_size = 64;
7146         cu_info->lds_size = 64;
7147 }
7148
7149 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7150 {
7151         .type = AMD_IP_BLOCK_TYPE_GFX,
7152         .major = 8,
7153         .minor = 0,
7154         .rev = 0,
7155         .funcs = &gfx_v8_0_ip_funcs,
7156 };
7157
7158 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7159 {
7160         .type = AMD_IP_BLOCK_TYPE_GFX,
7161         .major = 8,
7162         .minor = 1,
7163         .rev = 0,
7164         .funcs = &gfx_v8_0_ip_funcs,
7165 };
7166
7167 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7168 {
7169         uint64_t ce_payload_addr;
7170         int cnt_ce;
7171         union {
7172                 struct vi_ce_ib_state regular;
7173                 struct vi_ce_ib_state_chained_ib chained;
7174         } ce_payload = {};
7175
7176         if (ring->adev->virt.chained_ib_support) {
7177                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7178                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7179                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7180         } else {
7181                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7182                         offsetof(struct vi_gfx_meta_data, ce_payload);
7183                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7184         }
7185
7186         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7187         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7188                                 WRITE_DATA_DST_SEL(8) |
7189                                 WR_CONFIRM) |
7190                                 WRITE_DATA_CACHE_POLICY(0));
7191         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7192         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7193         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7194 }
7195
7196 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7197 {
7198         uint64_t de_payload_addr, gds_addr, csa_addr;
7199         int cnt_de;
7200         union {
7201                 struct vi_de_ib_state regular;
7202                 struct vi_de_ib_state_chained_ib chained;
7203         } de_payload = {};
7204
7205         csa_addr = amdgpu_csa_vaddr(ring->adev);
7206         gds_addr = csa_addr + 4096;
7207         if (ring->adev->virt.chained_ib_support) {
7208                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7209                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7210                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7211                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7212         } else {
7213                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7214                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7215                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7216                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7217         }
7218
7219         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7220         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7221                                 WRITE_DATA_DST_SEL(8) |
7222                                 WR_CONFIRM) |
7223                                 WRITE_DATA_CACHE_POLICY(0));
7224         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7225         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7226         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7227 }