drm/amdgpu: remove set but not used variable 'mc_shared_chmap'
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734         switch (adev->asic_type) {
735         case CHIP_TOPAZ:
736                 amdgpu_device_program_register_sequence(adev,
737                                                         iceland_mgcg_cgcg_init,
738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         golden_settings_iceland_a11,
741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_golden_common_all,
744                                                         ARRAY_SIZE(iceland_golden_common_all));
745                 break;
746         case CHIP_FIJI:
747                 amdgpu_device_program_register_sequence(adev,
748                                                         fiji_mgcg_cgcg_init,
749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         golden_settings_fiji_a10,
752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_golden_common_all,
755                                                         ARRAY_SIZE(fiji_golden_common_all));
756                 break;
757
758         case CHIP_TONGA:
759                 amdgpu_device_program_register_sequence(adev,
760                                                         tonga_mgcg_cgcg_init,
761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         golden_settings_tonga_a11,
764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_golden_common_all,
767                                                         ARRAY_SIZE(tonga_golden_common_all));
768                 break;
769         case CHIP_VEGAM:
770                 amdgpu_device_program_register_sequence(adev,
771                                                         golden_settings_vegam_a11,
772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
773                 amdgpu_device_program_register_sequence(adev,
774                                                         vegam_golden_common_all,
775                                                         ARRAY_SIZE(vegam_golden_common_all));
776                 break;
777         case CHIP_POLARIS11:
778         case CHIP_POLARIS12:
779                 amdgpu_device_program_register_sequence(adev,
780                                                         golden_settings_polaris11_a11,
781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
782                 amdgpu_device_program_register_sequence(adev,
783                                                         polaris11_golden_common_all,
784                                                         ARRAY_SIZE(polaris11_golden_common_all));
785                 break;
786         case CHIP_POLARIS10:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         golden_settings_polaris10_a11,
789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         polaris10_golden_common_all,
792                                                         ARRAY_SIZE(polaris10_golden_common_all));
793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794                 if (adev->pdev->revision == 0xc7 &&
795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800                 }
801                 break;
802         case CHIP_CARRIZO:
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_mgcg_cgcg_init,
805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_settings_a11,
808                                                         ARRAY_SIZE(cz_golden_settings_a11));
809                 amdgpu_device_program_register_sequence(adev,
810                                                         cz_golden_common_all,
811                                                         ARRAY_SIZE(cz_golden_common_all));
812                 break;
813         case CHIP_STONEY:
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_mgcg_cgcg_init,
816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_settings_a11,
819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
820                 amdgpu_device_program_register_sequence(adev,
821                                                         stoney_golden_common_all,
822                                                         ARRAY_SIZE(stoney_golden_common_all));
823                 break;
824         default:
825                 break;
826         }
827 }
828
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831         adev->gfx.scratch.num_reg = 8;
832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838         struct amdgpu_device *adev = ring->adev;
839         uint32_t scratch;
840         uint32_t tmp = 0;
841         unsigned i;
842         int r;
843
844         r = amdgpu_gfx_scratch_get(adev, &scratch);
845         if (r)
846                 return r;
847
848         WREG32(scratch, 0xCAFEDEAD);
849         r = amdgpu_ring_alloc(ring, 3);
850         if (r)
851                 goto error_free_scratch;
852
853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855         amdgpu_ring_write(ring, 0xDEADBEEF);
856         amdgpu_ring_commit(ring);
857
858         for (i = 0; i < adev->usec_timeout; i++) {
859                 tmp = RREG32(scratch);
860                 if (tmp == 0xDEADBEEF)
861                         break;
862                 udelay(1);
863         }
864
865         if (i >= adev->usec_timeout)
866                 r = -ETIMEDOUT;
867
868 error_free_scratch:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875         struct amdgpu_device *adev = ring->adev;
876         struct amdgpu_ib ib;
877         struct dma_fence *f = NULL;
878
879         unsigned int index;
880         uint64_t gpu_addr;
881         uint32_t tmp;
882         long r;
883
884         r = amdgpu_device_wb_get(adev, &index);
885         if (r)
886                 return r;
887
888         gpu_addr = adev->wb.gpu_addr + (index * 4);
889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890         memset(&ib, 0, sizeof(ib));
891         r = amdgpu_ib_get(adev, NULL, 16, &ib);
892         if (r)
893                 goto err1;
894
895         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
896         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
897         ib.ptr[2] = lower_32_bits(gpu_addr);
898         ib.ptr[3] = upper_32_bits(gpu_addr);
899         ib.ptr[4] = 0xDEADBEEF;
900         ib.length_dw = 5;
901
902         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
903         if (r)
904                 goto err2;
905
906         r = dma_fence_wait_timeout(f, false, timeout);
907         if (r == 0) {
908                 r = -ETIMEDOUT;
909                 goto err2;
910         } else if (r < 0) {
911                 goto err2;
912         }
913
914         tmp = adev->wb.wb[index];
915         if (tmp == 0xDEADBEEF)
916                 r = 0;
917         else
918                 r = -EINVAL;
919
920 err2:
921         amdgpu_ib_free(adev, &ib, NULL);
922         dma_fence_put(f);
923 err1:
924         amdgpu_device_wb_free(adev, index);
925         return r;
926 }
927
928
929 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
930 {
931         release_firmware(adev->gfx.pfp_fw);
932         adev->gfx.pfp_fw = NULL;
933         release_firmware(adev->gfx.me_fw);
934         adev->gfx.me_fw = NULL;
935         release_firmware(adev->gfx.ce_fw);
936         adev->gfx.ce_fw = NULL;
937         release_firmware(adev->gfx.rlc_fw);
938         adev->gfx.rlc_fw = NULL;
939         release_firmware(adev->gfx.mec_fw);
940         adev->gfx.mec_fw = NULL;
941         if ((adev->asic_type != CHIP_STONEY) &&
942             (adev->asic_type != CHIP_TOPAZ))
943                 release_firmware(adev->gfx.mec2_fw);
944         adev->gfx.mec2_fw = NULL;
945
946         kfree(adev->gfx.rlc.register_list_format);
947 }
948
949 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
950 {
951         const char *chip_name;
952         char fw_name[30];
953         int err;
954         struct amdgpu_firmware_info *info = NULL;
955         const struct common_firmware_header *header = NULL;
956         const struct gfx_firmware_header_v1_0 *cp_hdr;
957         const struct rlc_firmware_header_v2_0 *rlc_hdr;
958         unsigned int *tmp = NULL, i;
959
960         DRM_DEBUG("\n");
961
962         switch (adev->asic_type) {
963         case CHIP_TOPAZ:
964                 chip_name = "topaz";
965                 break;
966         case CHIP_TONGA:
967                 chip_name = "tonga";
968                 break;
969         case CHIP_CARRIZO:
970                 chip_name = "carrizo";
971                 break;
972         case CHIP_FIJI:
973                 chip_name = "fiji";
974                 break;
975         case CHIP_STONEY:
976                 chip_name = "stoney";
977                 break;
978         case CHIP_POLARIS10:
979                 chip_name = "polaris10";
980                 break;
981         case CHIP_POLARIS11:
982                 chip_name = "polaris11";
983                 break;
984         case CHIP_POLARIS12:
985                 chip_name = "polaris12";
986                 break;
987         case CHIP_VEGAM:
988                 chip_name = "vegam";
989                 break;
990         default:
991                 BUG();
992         }
993
994         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
995                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
996                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997                 if (err == -ENOENT) {
998                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000                 }
1001         } else {
1002                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1003                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004         }
1005         if (err)
1006                 goto out;
1007         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1008         if (err)
1009                 goto out;
1010         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1011         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1012         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013
1014         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1015                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1016                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017                 if (err == -ENOENT) {
1018                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020                 }
1021         } else {
1022                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1023                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024         }
1025         if (err)
1026                 goto out;
1027         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1028         if (err)
1029                 goto out;
1030         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1031         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032
1033         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034
1035         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1036                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1037                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038                 if (err == -ENOENT) {
1039                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041                 }
1042         } else {
1043                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1044                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045         }
1046         if (err)
1047                 goto out;
1048         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1049         if (err)
1050                 goto out;
1051         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1052         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054
1055         /*
1056          * Support for MCBP/Virtualization in combination with chained IBs is
1057          * formal released on feature version #46
1058          */
1059         if (adev->gfx.ce_feature_version >= 46 &&
1060             adev->gfx.pfp_feature_version >= 46) {
1061                 adev->virt.chained_ib_support = true;
1062                 DRM_INFO("Chained IB support enabled!\n");
1063         } else
1064                 adev->virt.chained_ib_support = false;
1065
1066         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1067         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1068         if (err)
1069                 goto out;
1070         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1071         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1072         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1073         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074
1075         adev->gfx.rlc.save_and_restore_offset =
1076                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1077         adev->gfx.rlc.clear_state_descriptor_offset =
1078                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1079         adev->gfx.rlc.avail_scratch_ram_locations =
1080                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1081         adev->gfx.rlc.reg_restore_list_size =
1082                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1083         adev->gfx.rlc.reg_list_format_start =
1084                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1085         adev->gfx.rlc.reg_list_format_separate_start =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1087         adev->gfx.rlc.starting_offsets_start =
1088                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1089         adev->gfx.rlc.reg_list_format_size_bytes =
1090                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1091         adev->gfx.rlc.reg_list_size_bytes =
1092                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093
1094         adev->gfx.rlc.register_list_format =
1095                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1096                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097
1098         if (!adev->gfx.rlc.register_list_format) {
1099                 err = -ENOMEM;
1100                 goto out;
1101         }
1102
1103         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1104                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1105         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1106                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1107
1108         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109
1110         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1111                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1112         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1113                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114
1115         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1116                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1117                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118                 if (err == -ENOENT) {
1119                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121                 }
1122         } else {
1123                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1124                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125         }
1126         if (err)
1127                 goto out;
1128         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1129         if (err)
1130                 goto out;
1131         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1132         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1133         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134
1135         if ((adev->asic_type != CHIP_STONEY) &&
1136             (adev->asic_type != CHIP_TOPAZ)) {
1137                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1138                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1139                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140                         if (err == -ENOENT) {
1141                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                         }
1144                 } else {
1145                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1146                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1147                 }
1148                 if (!err) {
1149                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1150                         if (err)
1151                                 goto out;
1152                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1153                                 adev->gfx.mec2_fw->data;
1154                         adev->gfx.mec2_fw_version =
1155                                 le32_to_cpu(cp_hdr->header.ucode_version);
1156                         adev->gfx.mec2_feature_version =
1157                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1158                 } else {
1159                         err = 0;
1160                         adev->gfx.mec2_fw = NULL;
1161                 }
1162         }
1163
1164         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166         info->fw = adev->gfx.pfp_fw;
1167         header = (const struct common_firmware_header *)info->fw->data;
1168         adev->firmware.fw_size +=
1169                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173         info->fw = adev->gfx.me_fw;
1174         header = (const struct common_firmware_header *)info->fw->data;
1175         adev->firmware.fw_size +=
1176                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180         info->fw = adev->gfx.ce_fw;
1181         header = (const struct common_firmware_header *)info->fw->data;
1182         adev->firmware.fw_size +=
1183                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187         info->fw = adev->gfx.rlc_fw;
1188         header = (const struct common_firmware_header *)info->fw->data;
1189         adev->firmware.fw_size +=
1190                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194         info->fw = adev->gfx.mec_fw;
1195         header = (const struct common_firmware_header *)info->fw->data;
1196         adev->firmware.fw_size +=
1197                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198
1199         /* we need account JT in */
1200         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201         adev->firmware.fw_size +=
1202                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203
1204         if (amdgpu_sriov_vf(adev)) {
1205                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207                 info->fw = adev->gfx.mec_fw;
1208                 adev->firmware.fw_size +=
1209                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210         }
1211
1212         if (adev->gfx.mec2_fw) {
1213                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215                 info->fw = adev->gfx.mec2_fw;
1216                 header = (const struct common_firmware_header *)info->fw->data;
1217                 adev->firmware.fw_size +=
1218                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219         }
1220
1221 out:
1222         if (err) {
1223                 dev_err(adev->dev,
1224                         "gfx8: Failed to load firmware \"%s\"\n",
1225                         fw_name);
1226                 release_firmware(adev->gfx.pfp_fw);
1227                 adev->gfx.pfp_fw = NULL;
1228                 release_firmware(adev->gfx.me_fw);
1229                 adev->gfx.me_fw = NULL;
1230                 release_firmware(adev->gfx.ce_fw);
1231                 adev->gfx.ce_fw = NULL;
1232                 release_firmware(adev->gfx.rlc_fw);
1233                 adev->gfx.rlc_fw = NULL;
1234                 release_firmware(adev->gfx.mec_fw);
1235                 adev->gfx.mec_fw = NULL;
1236                 release_firmware(adev->gfx.mec2_fw);
1237                 adev->gfx.mec2_fw = NULL;
1238         }
1239         return err;
1240 }
1241
1242 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1243                                     volatile u32 *buffer)
1244 {
1245         u32 count = 0, i;
1246         const struct cs_section_def *sect = NULL;
1247         const struct cs_extent_def *ext = NULL;
1248
1249         if (adev->gfx.rlc.cs_data == NULL)
1250                 return;
1251         if (buffer == NULL)
1252                 return;
1253
1254         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1255         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1256
1257         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1258         buffer[count++] = cpu_to_le32(0x80000000);
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260
1261         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1262                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1263                         if (sect->id == SECT_CONTEXT) {
1264                                 buffer[count++] =
1265                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1266                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1267                                                 PACKET3_SET_CONTEXT_REG_START);
1268                                 for (i = 0; i < ext->reg_count; i++)
1269                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1270                         } else {
1271                                 return;
1272                         }
1273                 }
1274         }
1275
1276         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1277         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1278                         PACKET3_SET_CONTEXT_REG_START);
1279         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1281
1282         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1283         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1284
1285         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1286         buffer[count++] = cpu_to_le32(0);
1287 }
1288
1289 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1290 {
1291         if (adev->asic_type == CHIP_CARRIZO)
1292                 return 5;
1293         else
1294                 return 4;
1295 }
1296
1297 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1298 {
1299         const struct cs_section_def *cs_data;
1300         int r;
1301
1302         adev->gfx.rlc.cs_data = vi_cs_data;
1303
1304         cs_data = adev->gfx.rlc.cs_data;
1305
1306         if (cs_data) {
1307                 /* init clear state block */
1308                 r = amdgpu_gfx_rlc_init_csb(adev);
1309                 if (r)
1310                         return r;
1311         }
1312
1313         if ((adev->asic_type == CHIP_CARRIZO) ||
1314             (adev->asic_type == CHIP_STONEY)) {
1315                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1316                 r = amdgpu_gfx_rlc_init_cpt(adev);
1317                 if (r)
1318                         return r;
1319         }
1320
1321         return 0;
1322 }
1323
1324 static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
1325 {
1326         int r;
1327
1328         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1329         if (unlikely(r != 0))
1330                 return r;
1331
1332         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1333                         AMDGPU_GEM_DOMAIN_VRAM);
1334         if (!r)
1335                 adev->gfx.rlc.clear_state_gpu_addr =
1336                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1337
1338         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1339
1340         return r;
1341 }
1342
1343 static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
1344 {
1345         int r;
1346
1347         if (!adev->gfx.rlc.clear_state_obj)
1348                 return;
1349
1350         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1351         if (likely(r == 0)) {
1352                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1353                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1354         }
1355 }
1356
1357 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1358 {
1359         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1360 }
1361
1362 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1363 {
1364         int r;
1365         u32 *hpd;
1366         size_t mec_hpd_size;
1367
1368         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1369
1370         /* take ownership of the relevant compute queues */
1371         amdgpu_gfx_compute_queue_acquire(adev);
1372
1373         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1374
1375         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1376                                       AMDGPU_GEM_DOMAIN_VRAM,
1377                                       &adev->gfx.mec.hpd_eop_obj,
1378                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1379                                       (void **)&hpd);
1380         if (r) {
1381                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382                 return r;
1383         }
1384
1385         memset(hpd, 0, mec_hpd_size);
1386
1387         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1388         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1389
1390         return 0;
1391 }
1392
1393 static const u32 vgpr_init_compute_shader[] =
1394 {
1395         0x7e000209, 0x7e020208,
1396         0x7e040207, 0x7e060206,
1397         0x7e080205, 0x7e0a0204,
1398         0x7e0c0203, 0x7e0e0202,
1399         0x7e100201, 0x7e120200,
1400         0x7e140209, 0x7e160208,
1401         0x7e180207, 0x7e1a0206,
1402         0x7e1c0205, 0x7e1e0204,
1403         0x7e200203, 0x7e220202,
1404         0x7e240201, 0x7e260200,
1405         0x7e280209, 0x7e2a0208,
1406         0x7e2c0207, 0x7e2e0206,
1407         0x7e300205, 0x7e320204,
1408         0x7e340203, 0x7e360202,
1409         0x7e380201, 0x7e3a0200,
1410         0x7e3c0209, 0x7e3e0208,
1411         0x7e400207, 0x7e420206,
1412         0x7e440205, 0x7e460204,
1413         0x7e480203, 0x7e4a0202,
1414         0x7e4c0201, 0x7e4e0200,
1415         0x7e500209, 0x7e520208,
1416         0x7e540207, 0x7e560206,
1417         0x7e580205, 0x7e5a0204,
1418         0x7e5c0203, 0x7e5e0202,
1419         0x7e600201, 0x7e620200,
1420         0x7e640209, 0x7e660208,
1421         0x7e680207, 0x7e6a0206,
1422         0x7e6c0205, 0x7e6e0204,
1423         0x7e700203, 0x7e720202,
1424         0x7e740201, 0x7e760200,
1425         0x7e780209, 0x7e7a0208,
1426         0x7e7c0207, 0x7e7e0206,
1427         0xbf8a0000, 0xbf810000,
1428 };
1429
1430 static const u32 sgpr_init_compute_shader[] =
1431 {
1432         0xbe8a0100, 0xbe8c0102,
1433         0xbe8e0104, 0xbe900106,
1434         0xbe920108, 0xbe940100,
1435         0xbe960102, 0xbe980104,
1436         0xbe9a0106, 0xbe9c0108,
1437         0xbe9e0100, 0xbea00102,
1438         0xbea20104, 0xbea40106,
1439         0xbea60108, 0xbea80100,
1440         0xbeaa0102, 0xbeac0104,
1441         0xbeae0106, 0xbeb00108,
1442         0xbeb20100, 0xbeb40102,
1443         0xbeb60104, 0xbeb80106,
1444         0xbeba0108, 0xbebc0100,
1445         0xbebe0102, 0xbec00104,
1446         0xbec20106, 0xbec40108,
1447         0xbec60100, 0xbec80102,
1448         0xbee60004, 0xbee70005,
1449         0xbeea0006, 0xbeeb0007,
1450         0xbee80008, 0xbee90009,
1451         0xbefc0000, 0xbf8a0000,
1452         0xbf810000, 0x00000000,
1453 };
1454
1455 static const u32 vgpr_init_regs[] =
1456 {
1457         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1458         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1459         mmCOMPUTE_NUM_THREAD_X, 256*4,
1460         mmCOMPUTE_NUM_THREAD_Y, 1,
1461         mmCOMPUTE_NUM_THREAD_Z, 1,
1462         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1463         mmCOMPUTE_PGM_RSRC2, 20,
1464         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1465         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1466         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1467         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1468         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1469         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1470         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1471         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1472         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1473         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1474 };
1475
1476 static const u32 sgpr1_init_regs[] =
1477 {
1478         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1479         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1480         mmCOMPUTE_NUM_THREAD_X, 256*5,
1481         mmCOMPUTE_NUM_THREAD_Y, 1,
1482         mmCOMPUTE_NUM_THREAD_Z, 1,
1483         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1484         mmCOMPUTE_PGM_RSRC2, 20,
1485         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1486         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1487         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1488         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1489         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1490         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1491         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1492         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1493         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1494         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1495 };
1496
1497 static const u32 sgpr2_init_regs[] =
1498 {
1499         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1500         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1501         mmCOMPUTE_NUM_THREAD_X, 256*5,
1502         mmCOMPUTE_NUM_THREAD_Y, 1,
1503         mmCOMPUTE_NUM_THREAD_Z, 1,
1504         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1505         mmCOMPUTE_PGM_RSRC2, 20,
1506         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1507         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1508         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1509         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1510         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1511         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1512         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1513         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1514         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1515         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1516 };
1517
1518 static const u32 sec_ded_counter_registers[] =
1519 {
1520         mmCPC_EDC_ATC_CNT,
1521         mmCPC_EDC_SCRATCH_CNT,
1522         mmCPC_EDC_UCODE_CNT,
1523         mmCPF_EDC_ATC_CNT,
1524         mmCPF_EDC_ROQ_CNT,
1525         mmCPF_EDC_TAG_CNT,
1526         mmCPG_EDC_ATC_CNT,
1527         mmCPG_EDC_DMA_CNT,
1528         mmCPG_EDC_TAG_CNT,
1529         mmDC_EDC_CSINVOC_CNT,
1530         mmDC_EDC_RESTORE_CNT,
1531         mmDC_EDC_STATE_CNT,
1532         mmGDS_EDC_CNT,
1533         mmGDS_EDC_GRBM_CNT,
1534         mmGDS_EDC_OA_DED,
1535         mmSPI_EDC_CNT,
1536         mmSQC_ATC_EDC_GATCL1_CNT,
1537         mmSQC_EDC_CNT,
1538         mmSQ_EDC_DED_CNT,
1539         mmSQ_EDC_INFO,
1540         mmSQ_EDC_SEC_CNT,
1541         mmTCC_EDC_CNT,
1542         mmTCP_ATC_EDC_GATCL1_CNT,
1543         mmTCP_EDC_CNT,
1544         mmTD_EDC_CNT
1545 };
1546
1547 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1548 {
1549         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1550         struct amdgpu_ib ib;
1551         struct dma_fence *f = NULL;
1552         int r, i;
1553         u32 tmp;
1554         unsigned total_size, vgpr_offset, sgpr_offset;
1555         u64 gpu_addr;
1556
1557         /* only supported on CZ */
1558         if (adev->asic_type != CHIP_CARRIZO)
1559                 return 0;
1560
1561         /* bail if the compute ring is not ready */
1562         if (!ring->sched.ready)
1563                 return 0;
1564
1565         tmp = RREG32(mmGB_EDC_MODE);
1566         WREG32(mmGB_EDC_MODE, 0);
1567
1568         total_size =
1569                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1570         total_size +=
1571                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572         total_size +=
1573                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574         total_size = ALIGN(total_size, 256);
1575         vgpr_offset = total_size;
1576         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1577         sgpr_offset = total_size;
1578         total_size += sizeof(sgpr_init_compute_shader);
1579
1580         /* allocate an indirect buffer to put the commands in */
1581         memset(&ib, 0, sizeof(ib));
1582         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1583         if (r) {
1584                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1585                 return r;
1586         }
1587
1588         /* load the compute shaders */
1589         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1590                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1591
1592         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1593                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1594
1595         /* init the ib length to 0 */
1596         ib.length_dw = 0;
1597
1598         /* VGPR */
1599         /* write the register state for the compute dispatch */
1600         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1601                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1602                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1603                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1604         }
1605         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1606         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1607         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1608         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1609         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1610         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1611
1612         /* write dispatch packet */
1613         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1614         ib.ptr[ib.length_dw++] = 8; /* x */
1615         ib.ptr[ib.length_dw++] = 1; /* y */
1616         ib.ptr[ib.length_dw++] = 1; /* z */
1617         ib.ptr[ib.length_dw++] =
1618                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1619
1620         /* write CS partial flush packet */
1621         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1622         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1623
1624         /* SGPR1 */
1625         /* write the register state for the compute dispatch */
1626         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1627                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1629                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1630         }
1631         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1633         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637
1638         /* write dispatch packet */
1639         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640         ib.ptr[ib.length_dw++] = 8; /* x */
1641         ib.ptr[ib.length_dw++] = 1; /* y */
1642         ib.ptr[ib.length_dw++] = 1; /* z */
1643         ib.ptr[ib.length_dw++] =
1644                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645
1646         /* write CS partial flush packet */
1647         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649
1650         /* SGPR2 */
1651         /* write the register state for the compute dispatch */
1652         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1653                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1654                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1655                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1656         }
1657         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1658         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1659         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1660         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1661         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1662         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663
1664         /* write dispatch packet */
1665         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1666         ib.ptr[ib.length_dw++] = 8; /* x */
1667         ib.ptr[ib.length_dw++] = 1; /* y */
1668         ib.ptr[ib.length_dw++] = 1; /* z */
1669         ib.ptr[ib.length_dw++] =
1670                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671
1672         /* write CS partial flush packet */
1673         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1674         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675
1676         /* shedule the ib on the ring */
1677         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1678         if (r) {
1679                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1680                 goto fail;
1681         }
1682
1683         /* wait for the GPU to finish processing the IB */
1684         r = dma_fence_wait(f, false);
1685         if (r) {
1686                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1687                 goto fail;
1688         }
1689
1690         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1691         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1692         WREG32(mmGB_EDC_MODE, tmp);
1693
1694         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1695         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1696         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1697
1698
1699         /* read back registers to clear the counters */
1700         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1701                 RREG32(sec_ded_counter_registers[i]);
1702
1703 fail:
1704         amdgpu_ib_free(adev, &ib, NULL);
1705         dma_fence_put(f);
1706
1707         return r;
1708 }
1709
1710 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1711 {
1712         u32 gb_addr_config;
1713         u32 mc_arb_ramcfg;
1714         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1715         u32 tmp;
1716         int ret;
1717
1718         switch (adev->asic_type) {
1719         case CHIP_TOPAZ:
1720                 adev->gfx.config.max_shader_engines = 1;
1721                 adev->gfx.config.max_tile_pipes = 2;
1722                 adev->gfx.config.max_cu_per_sh = 6;
1723                 adev->gfx.config.max_sh_per_se = 1;
1724                 adev->gfx.config.max_backends_per_se = 2;
1725                 adev->gfx.config.max_texture_channel_caches = 2;
1726                 adev->gfx.config.max_gprs = 256;
1727                 adev->gfx.config.max_gs_threads = 32;
1728                 adev->gfx.config.max_hw_contexts = 8;
1729
1730                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1735                 break;
1736         case CHIP_FIJI:
1737                 adev->gfx.config.max_shader_engines = 4;
1738                 adev->gfx.config.max_tile_pipes = 16;
1739                 adev->gfx.config.max_cu_per_sh = 16;
1740                 adev->gfx.config.max_sh_per_se = 1;
1741                 adev->gfx.config.max_backends_per_se = 4;
1742                 adev->gfx.config.max_texture_channel_caches = 16;
1743                 adev->gfx.config.max_gprs = 256;
1744                 adev->gfx.config.max_gs_threads = 32;
1745                 adev->gfx.config.max_hw_contexts = 8;
1746
1747                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752                 break;
1753         case CHIP_POLARIS11:
1754         case CHIP_POLARIS12:
1755                 ret = amdgpu_atombios_get_gfx_info(adev);
1756                 if (ret)
1757                         return ret;
1758                 adev->gfx.config.max_gprs = 256;
1759                 adev->gfx.config.max_gs_threads = 32;
1760                 adev->gfx.config.max_hw_contexts = 8;
1761
1762                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1767                 break;
1768         case CHIP_POLARIS10:
1769         case CHIP_VEGAM:
1770                 ret = amdgpu_atombios_get_gfx_info(adev);
1771                 if (ret)
1772                         return ret;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_TONGA:
1784                 adev->gfx.config.max_shader_engines = 4;
1785                 adev->gfx.config.max_tile_pipes = 8;
1786                 adev->gfx.config.max_cu_per_sh = 8;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 2;
1789                 adev->gfx.config.max_texture_channel_caches = 8;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 32;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         case CHIP_CARRIZO:
1801                 adev->gfx.config.max_shader_engines = 1;
1802                 adev->gfx.config.max_tile_pipes = 2;
1803                 adev->gfx.config.max_sh_per_se = 1;
1804                 adev->gfx.config.max_backends_per_se = 2;
1805                 adev->gfx.config.max_cu_per_sh = 8;
1806                 adev->gfx.config.max_texture_channel_caches = 2;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         case CHIP_STONEY:
1818                 adev->gfx.config.max_shader_engines = 1;
1819                 adev->gfx.config.max_tile_pipes = 2;
1820                 adev->gfx.config.max_sh_per_se = 1;
1821                 adev->gfx.config.max_backends_per_se = 1;
1822                 adev->gfx.config.max_cu_per_sh = 3;
1823                 adev->gfx.config.max_texture_channel_caches = 2;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 16;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         default:
1835                 adev->gfx.config.max_shader_engines = 2;
1836                 adev->gfx.config.max_tile_pipes = 4;
1837                 adev->gfx.config.max_cu_per_sh = 2;
1838                 adev->gfx.config.max_sh_per_se = 1;
1839                 adev->gfx.config.max_backends_per_se = 2;
1840                 adev->gfx.config.max_texture_channel_caches = 4;
1841                 adev->gfx.config.max_gprs = 256;
1842                 adev->gfx.config.max_gs_threads = 32;
1843                 adev->gfx.config.max_hw_contexts = 8;
1844
1845                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850                 break;
1851         }
1852
1853         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1854         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1855
1856         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1857         adev->gfx.config.mem_max_burst_length_bytes = 256;
1858         if (adev->flags & AMD_IS_APU) {
1859                 /* Get memory bank mapping mode. */
1860                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1861                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1862                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1863
1864                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1865                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1866                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1867
1868                 /* Validate settings in case only one DIMM installed. */
1869                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1870                         dimm00_addr_map = 0;
1871                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1872                         dimm01_addr_map = 0;
1873                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1874                         dimm10_addr_map = 0;
1875                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1876                         dimm11_addr_map = 0;
1877
1878                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1879                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1880                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1881                         adev->gfx.config.mem_row_size_in_kb = 2;
1882                 else
1883                         adev->gfx.config.mem_row_size_in_kb = 1;
1884         } else {
1885                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1886                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1887                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1888                         adev->gfx.config.mem_row_size_in_kb = 4;
1889         }
1890
1891         adev->gfx.config.shader_engine_tile_size = 32;
1892         adev->gfx.config.num_gpus = 1;
1893         adev->gfx.config.multi_gpu_tile_size = 64;
1894
1895         /* fix up row size */
1896         switch (adev->gfx.config.mem_row_size_in_kb) {
1897         case 1:
1898         default:
1899                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1900                 break;
1901         case 2:
1902                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1903                 break;
1904         case 4:
1905                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1906                 break;
1907         }
1908         adev->gfx.config.gb_addr_config = gb_addr_config;
1909
1910         return 0;
1911 }
1912
1913 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1914                                         int mec, int pipe, int queue)
1915 {
1916         int r;
1917         unsigned irq_type;
1918         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1919
1920         ring = &adev->gfx.compute_ring[ring_id];
1921
1922         /* mec0 is me1 */
1923         ring->me = mec + 1;
1924         ring->pipe = pipe;
1925         ring->queue = queue;
1926
1927         ring->ring_obj = NULL;
1928         ring->use_doorbell = true;
1929         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1930         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1931                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1932         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1933
1934         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1935                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1936                 + ring->pipe;
1937
1938         /* type-2 packets are deprecated on MEC, use type-3 instead */
1939         r = amdgpu_ring_init(adev, ring, 1024,
1940                         &adev->gfx.eop_irq, irq_type);
1941         if (r)
1942                 return r;
1943
1944
1945         return 0;
1946 }
1947
1948 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1949
1950 static int gfx_v8_0_sw_init(void *handle)
1951 {
1952         int i, j, k, r, ring_id;
1953         struct amdgpu_ring *ring;
1954         struct amdgpu_kiq *kiq;
1955         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1956
1957         switch (adev->asic_type) {
1958         case CHIP_TONGA:
1959         case CHIP_CARRIZO:
1960         case CHIP_FIJI:
1961         case CHIP_POLARIS10:
1962         case CHIP_POLARIS11:
1963         case CHIP_POLARIS12:
1964         case CHIP_VEGAM:
1965                 adev->gfx.mec.num_mec = 2;
1966                 break;
1967         case CHIP_TOPAZ:
1968         case CHIP_STONEY:
1969         default:
1970                 adev->gfx.mec.num_mec = 1;
1971                 break;
1972         }
1973
1974         adev->gfx.mec.num_pipe_per_mec = 4;
1975         adev->gfx.mec.num_queue_per_pipe = 8;
1976
1977         /* EOP Event */
1978         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1979         if (r)
1980                 return r;
1981
1982         /* Privileged reg */
1983         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1984                               &adev->gfx.priv_reg_irq);
1985         if (r)
1986                 return r;
1987
1988         /* Privileged inst */
1989         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1990                               &adev->gfx.priv_inst_irq);
1991         if (r)
1992                 return r;
1993
1994         /* Add CP EDC/ECC irq  */
1995         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1996                               &adev->gfx.cp_ecc_error_irq);
1997         if (r)
1998                 return r;
1999
2000         /* SQ interrupts. */
2001         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2002                               &adev->gfx.sq_irq);
2003         if (r) {
2004                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2005                 return r;
2006         }
2007
2008         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2009
2010         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2011
2012         gfx_v8_0_scratch_init(adev);
2013
2014         r = gfx_v8_0_init_microcode(adev);
2015         if (r) {
2016                 DRM_ERROR("Failed to load gfx firmware!\n");
2017                 return r;
2018         }
2019
2020         r = adev->gfx.rlc.funcs->init(adev);
2021         if (r) {
2022                 DRM_ERROR("Failed to init rlc BOs!\n");
2023                 return r;
2024         }
2025
2026         r = gfx_v8_0_mec_init(adev);
2027         if (r) {
2028                 DRM_ERROR("Failed to init MEC BOs!\n");
2029                 return r;
2030         }
2031
2032         /* set up the gfx ring */
2033         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2034                 ring = &adev->gfx.gfx_ring[i];
2035                 ring->ring_obj = NULL;
2036                 sprintf(ring->name, "gfx");
2037                 /* no gfx doorbells on iceland */
2038                 if (adev->asic_type != CHIP_TOPAZ) {
2039                         ring->use_doorbell = true;
2040                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2041                 }
2042
2043                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2044                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2045                 if (r)
2046                         return r;
2047         }
2048
2049
2050         /* set up the compute queues - allocate horizontally across pipes */
2051         ring_id = 0;
2052         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2053                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2054                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2055                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2056                                         continue;
2057
2058                                 r = gfx_v8_0_compute_ring_init(adev,
2059                                                                 ring_id,
2060                                                                 i, k, j);
2061                                 if (r)
2062                                         return r;
2063
2064                                 ring_id++;
2065                         }
2066                 }
2067         }
2068
2069         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2070         if (r) {
2071                 DRM_ERROR("Failed to init KIQ BOs!\n");
2072                 return r;
2073         }
2074
2075         kiq = &adev->gfx.kiq;
2076         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2077         if (r)
2078                 return r;
2079
2080         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2081         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2082         if (r)
2083                 return r;
2084
2085         adev->gfx.ce_ram_size = 0x8000;
2086
2087         r = gfx_v8_0_gpu_early_init(adev);
2088         if (r)
2089                 return r;
2090
2091         return 0;
2092 }
2093
2094 static int gfx_v8_0_sw_fini(void *handle)
2095 {
2096         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2097         int i;
2098
2099         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2100                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2101         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2102                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2103
2104         amdgpu_gfx_mqd_sw_fini(adev);
2105         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2106         amdgpu_gfx_kiq_fini(adev);
2107
2108         gfx_v8_0_mec_fini(adev);
2109         amdgpu_gfx_rlc_fini(adev);
2110         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2111                                 &adev->gfx.rlc.clear_state_gpu_addr,
2112                                 (void **)&adev->gfx.rlc.cs_ptr);
2113         if ((adev->asic_type == CHIP_CARRIZO) ||
2114             (adev->asic_type == CHIP_STONEY)) {
2115                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2116                                 &adev->gfx.rlc.cp_table_gpu_addr,
2117                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2118         }
2119         gfx_v8_0_free_microcode(adev);
2120
2121         return 0;
2122 }
2123
2124 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2125 {
2126         uint32_t *modearray, *mod2array;
2127         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2128         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2129         u32 reg_offset;
2130
2131         modearray = adev->gfx.config.tile_mode_array;
2132         mod2array = adev->gfx.config.macrotile_mode_array;
2133
2134         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2135                 modearray[reg_offset] = 0;
2136
2137         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2138                 mod2array[reg_offset] = 0;
2139
2140         switch (adev->asic_type) {
2141         case CHIP_TOPAZ:
2142                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2143                                 PIPE_CONFIG(ADDR_SURF_P2) |
2144                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2147                                 PIPE_CONFIG(ADDR_SURF_P2) |
2148                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2149                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2150                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                 PIPE_CONFIG(ADDR_SURF_P2) |
2152                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155                                 PIPE_CONFIG(ADDR_SURF_P2) |
2156                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159                                 PIPE_CONFIG(ADDR_SURF_P2) |
2160                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2163                                 PIPE_CONFIG(ADDR_SURF_P2) |
2164                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2166                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2167                                 PIPE_CONFIG(ADDR_SURF_P2) |
2168                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2170                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2171                                 PIPE_CONFIG(ADDR_SURF_P2));
2172                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2175                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2184                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2192                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2200                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2221                                  PIPE_CONFIG(ADDR_SURF_P2) |
2222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2225                                  PIPE_CONFIG(ADDR_SURF_P2) |
2226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2228                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2229                                  PIPE_CONFIG(ADDR_SURF_P2) |
2230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2233                                  PIPE_CONFIG(ADDR_SURF_P2) |
2234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2236                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                  PIPE_CONFIG(ADDR_SURF_P2) |
2238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2244
2245                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                                 NUM_BANKS(ADDR_SURF_8_BANK));
2249                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252                                 NUM_BANKS(ADDR_SURF_8_BANK));
2253                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256                                 NUM_BANKS(ADDR_SURF_8_BANK));
2257                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                                 NUM_BANKS(ADDR_SURF_8_BANK));
2261                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                 NUM_BANKS(ADDR_SURF_8_BANK));
2265                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2268                                 NUM_BANKS(ADDR_SURF_8_BANK));
2269                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2272                                 NUM_BANKS(ADDR_SURF_8_BANK));
2273                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276                                 NUM_BANKS(ADDR_SURF_16_BANK));
2277                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280                                 NUM_BANKS(ADDR_SURF_16_BANK));
2281                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2282                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2283                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284                                  NUM_BANKS(ADDR_SURF_16_BANK));
2285                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2286                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2287                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                  NUM_BANKS(ADDR_SURF_16_BANK));
2289                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2291                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2292                                  NUM_BANKS(ADDR_SURF_16_BANK));
2293                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2294                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2295                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2296                                  NUM_BANKS(ADDR_SURF_16_BANK));
2297                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300                                  NUM_BANKS(ADDR_SURF_8_BANK));
2301
2302                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2303                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2304                             reg_offset != 23)
2305                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2306
2307                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2308                         if (reg_offset != 7)
2309                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2310
2311                 break;
2312         case CHIP_FIJI:
2313         case CHIP_VEGAM:
2314                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2325                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2329                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2335                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2339                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2341                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2344                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2347                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2348                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2356                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2361                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2364                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2380                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2384                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2405                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2409                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2432                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2433                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2436
2437                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                 NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                                 NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2468                                 NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472                                 NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                                  NUM_BANKS(ADDR_SURF_8_BANK));
2477                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2480                                  NUM_BANKS(ADDR_SURF_8_BANK));
2481                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484                                  NUM_BANKS(ADDR_SURF_8_BANK));
2485                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488                                  NUM_BANKS(ADDR_SURF_8_BANK));
2489                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                                  NUM_BANKS(ADDR_SURF_4_BANK));
2493
2494                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2495                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2496
2497                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2498                         if (reg_offset != 7)
2499                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2500
2501                 break;
2502         case CHIP_TONGA:
2503                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2514                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2518                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2524                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2528                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2530                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2536                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2537                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2545                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2569                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2570                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2573                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2594                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2598                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2618                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2620                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2621                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2622                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2625
2626                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641                                 NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645                                 NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                 NUM_BANKS(ADDR_SURF_16_BANK));
2650                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653                                 NUM_BANKS(ADDR_SURF_16_BANK));
2654                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2656                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657                                 NUM_BANKS(ADDR_SURF_16_BANK));
2658                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2660                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2661                                 NUM_BANKS(ADDR_SURF_16_BANK));
2662                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2664                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2665                                  NUM_BANKS(ADDR_SURF_16_BANK));
2666                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669                                  NUM_BANKS(ADDR_SURF_16_BANK));
2670                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673                                  NUM_BANKS(ADDR_SURF_8_BANK));
2674                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2677                                  NUM_BANKS(ADDR_SURF_4_BANK));
2678                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681                                  NUM_BANKS(ADDR_SURF_4_BANK));
2682
2683                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2684                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2685
2686                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2687                         if (reg_offset != 7)
2688                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2689
2690                 break;
2691         case CHIP_POLARIS11:
2692         case CHIP_POLARIS12:
2693                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2727                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2743                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2751                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2759                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2763                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2811                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2815
2816                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819                                 NUM_BANKS(ADDR_SURF_16_BANK));
2820
2821                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829                                 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2833                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834                                 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849                                 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2852                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2853                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2854                                 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2873                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2874                                 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_8_BANK));
2880
2881                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2884                                 NUM_BANKS(ADDR_SURF_4_BANK));
2885
2886                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2887                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2888
2889                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2890                         if (reg_offset != 7)
2891                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2892
2893                 break;
2894         case CHIP_POLARIS10:
2895                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2929                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2937                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2945                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2961                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2962                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2965                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3005                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3013                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3017
3018                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3020                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021                                 NUM_BANKS(ADDR_SURF_16_BANK));
3022
3023                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036                                 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3040                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3041                                 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3046                                 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051                                 NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3055                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3056                                 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3060                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061                                 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3065                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3066                                 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3076                                 NUM_BANKS(ADDR_SURF_8_BANK));
3077
3078                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081                                 NUM_BANKS(ADDR_SURF_4_BANK));
3082
3083                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086                                 NUM_BANKS(ADDR_SURF_4_BANK));
3087
3088                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3089                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3090
3091                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3092                         if (reg_offset != 7)
3093                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3094
3095                 break;
3096         case CHIP_STONEY:
3097                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2) |
3103                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2) |
3119                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3122                                 PIPE_CONFIG(ADDR_SURF_P2) |
3123                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3126                                 PIPE_CONFIG(ADDR_SURF_P2));
3127                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3131                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3139                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3147                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3155                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3191                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3192                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3199
3200                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                 NUM_BANKS(ADDR_SURF_8_BANK));
3208                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                 NUM_BANKS(ADDR_SURF_8_BANK));
3212                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                 NUM_BANKS(ADDR_SURF_8_BANK));
3224                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227                                 NUM_BANKS(ADDR_SURF_8_BANK));
3228                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                 NUM_BANKS(ADDR_SURF_16_BANK));
3232                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                 NUM_BANKS(ADDR_SURF_16_BANK));
3236                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3237                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3238                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                  NUM_BANKS(ADDR_SURF_16_BANK));
3240                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3241                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3242                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                  NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3246                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247                                  NUM_BANKS(ADDR_SURF_16_BANK));
3248                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3249                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3250                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251                                  NUM_BANKS(ADDR_SURF_16_BANK));
3252                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3254                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3255                                  NUM_BANKS(ADDR_SURF_8_BANK));
3256
3257                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3258                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3259                             reg_offset != 23)
3260                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3261
3262                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3263                         if (reg_offset != 7)
3264                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3265
3266                 break;
3267         default:
3268                 dev_warn(adev->dev,
3269                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3270                          adev->asic_type);
3271                 /* fall through */
3272
3273         case CHIP_CARRIZO:
3274                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275                                 PIPE_CONFIG(ADDR_SURF_P2) |
3276                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2) |
3280                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3281                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291                                 PIPE_CONFIG(ADDR_SURF_P2) |
3292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2) |
3296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3299                                 PIPE_CONFIG(ADDR_SURF_P2) |
3300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3303                                 PIPE_CONFIG(ADDR_SURF_P2));
3304                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3305                                 PIPE_CONFIG(ADDR_SURF_P2) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3307                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3308                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3316                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3332                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3353                                  PIPE_CONFIG(ADDR_SURF_P2) |
3354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3357                                  PIPE_CONFIG(ADDR_SURF_P2) |
3358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3361                                  PIPE_CONFIG(ADDR_SURF_P2) |
3362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3365                                  PIPE_CONFIG(ADDR_SURF_P2) |
3366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3368                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3369                                  PIPE_CONFIG(ADDR_SURF_P2) |
3370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3373                                  PIPE_CONFIG(ADDR_SURF_P2) |
3374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3376
3377                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3380                                 NUM_BANKS(ADDR_SURF_8_BANK));
3381                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_8_BANK));
3385                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3388                                 NUM_BANKS(ADDR_SURF_8_BANK));
3389                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                 NUM_BANKS(ADDR_SURF_8_BANK));
3393                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                 NUM_BANKS(ADDR_SURF_8_BANK));
3397                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                 NUM_BANKS(ADDR_SURF_8_BANK));
3401                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404                                 NUM_BANKS(ADDR_SURF_8_BANK));
3405                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3408                                 NUM_BANKS(ADDR_SURF_16_BANK));
3409                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                 NUM_BANKS(ADDR_SURF_16_BANK));
3413                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3414                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                  NUM_BANKS(ADDR_SURF_16_BANK));
3417                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3419                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                  NUM_BANKS(ADDR_SURF_16_BANK));
3421                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424                                  NUM_BANKS(ADDR_SURF_16_BANK));
3425                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3427                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428                                  NUM_BANKS(ADDR_SURF_16_BANK));
3429                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3432                                  NUM_BANKS(ADDR_SURF_8_BANK));
3433
3434                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3435                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3436                             reg_offset != 23)
3437                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3438
3439                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3440                         if (reg_offset != 7)
3441                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3442
3443                 break;
3444         }
3445 }
3446
3447 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3448                                   u32 se_num, u32 sh_num, u32 instance)
3449 {
3450         u32 data;
3451
3452         if (instance == 0xffffffff)
3453                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3454         else
3455                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3456
3457         if (se_num == 0xffffffff)
3458                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3459         else
3460                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3461
3462         if (sh_num == 0xffffffff)
3463                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3464         else
3465                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3466
3467         WREG32(mmGRBM_GFX_INDEX, data);
3468 }
3469
3470 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3471                                   u32 me, u32 pipe, u32 q, u32 vm)
3472 {
3473         vi_srbm_select(adev, me, pipe, q, vm);
3474 }
3475
3476 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3477 {
3478         u32 data, mask;
3479
3480         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3481                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3482
3483         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3484
3485         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3486                                          adev->gfx.config.max_sh_per_se);
3487
3488         return (~data) & mask;
3489 }
3490
3491 static void
3492 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3493 {
3494         switch (adev->asic_type) {
3495         case CHIP_FIJI:
3496         case CHIP_VEGAM:
3497                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3498                           RB_XSEL2(1) | PKR_MAP(2) |
3499                           PKR_XSEL(1) | PKR_YSEL(1) |
3500                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3501                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3502                            SE_PAIR_YSEL(2);
3503                 break;
3504         case CHIP_TONGA:
3505         case CHIP_POLARIS10:
3506                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3507                           SE_XSEL(1) | SE_YSEL(1);
3508                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3509                            SE_PAIR_YSEL(2);
3510                 break;
3511         case CHIP_TOPAZ:
3512         case CHIP_CARRIZO:
3513                 *rconf |= RB_MAP_PKR0(2);
3514                 *rconf1 |= 0x0;
3515                 break;
3516         case CHIP_POLARIS11:
3517         case CHIP_POLARIS12:
3518                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3519                           SE_XSEL(1) | SE_YSEL(1);
3520                 *rconf1 |= 0x0;
3521                 break;
3522         case CHIP_STONEY:
3523                 *rconf |= 0x0;
3524                 *rconf1 |= 0x0;
3525                 break;
3526         default:
3527                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3528                 break;
3529         }
3530 }
3531
3532 static void
3533 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3534                                         u32 raster_config, u32 raster_config_1,
3535                                         unsigned rb_mask, unsigned num_rb)
3536 {
3537         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3538         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3539         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3540         unsigned rb_per_se = num_rb / num_se;
3541         unsigned se_mask[4];
3542         unsigned se;
3543
3544         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3545         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3546         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3547         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3548
3549         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3550         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3551         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3552
3553         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3554                              (!se_mask[2] && !se_mask[3]))) {
3555                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3556
3557                 if (!se_mask[0] && !se_mask[1]) {
3558                         raster_config_1 |=
3559                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3560                 } else {
3561                         raster_config_1 |=
3562                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3563                 }
3564         }
3565
3566         for (se = 0; se < num_se; se++) {
3567                 unsigned raster_config_se = raster_config;
3568                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3569                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3570                 int idx = (se / 2) * 2;
3571
3572                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3573                         raster_config_se &= ~SE_MAP_MASK;
3574
3575                         if (!se_mask[idx]) {
3576                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3577                         } else {
3578                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3579                         }
3580                 }
3581
3582                 pkr0_mask &= rb_mask;
3583                 pkr1_mask &= rb_mask;
3584                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3585                         raster_config_se &= ~PKR_MAP_MASK;
3586
3587                         if (!pkr0_mask) {
3588                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3589                         } else {
3590                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3591                         }
3592                 }
3593
3594                 if (rb_per_se >= 2) {
3595                         unsigned rb0_mask = 1 << (se * rb_per_se);
3596                         unsigned rb1_mask = rb0_mask << 1;
3597
3598                         rb0_mask &= rb_mask;
3599                         rb1_mask &= rb_mask;
3600                         if (!rb0_mask || !rb1_mask) {
3601                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3602
3603                                 if (!rb0_mask) {
3604                                         raster_config_se |=
3605                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3606                                 } else {
3607                                         raster_config_se |=
3608                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3609                                 }
3610                         }
3611
3612                         if (rb_per_se > 2) {
3613                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3614                                 rb1_mask = rb0_mask << 1;
3615                                 rb0_mask &= rb_mask;
3616                                 rb1_mask &= rb_mask;
3617                                 if (!rb0_mask || !rb1_mask) {
3618                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3619
3620                                         if (!rb0_mask) {
3621                                                 raster_config_se |=
3622                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3623                                         } else {
3624                                                 raster_config_se |=
3625                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3626                                         }
3627                                 }
3628                         }
3629                 }
3630
3631                 /* GRBM_GFX_INDEX has a different offset on VI */
3632                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3633                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3634                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3635         }
3636
3637         /* GRBM_GFX_INDEX has a different offset on VI */
3638         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3639 }
3640
3641 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3642 {
3643         int i, j;
3644         u32 data;
3645         u32 raster_config = 0, raster_config_1 = 0;
3646         u32 active_rbs = 0;
3647         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3648                                         adev->gfx.config.max_sh_per_se;
3649         unsigned num_rb_pipes;
3650
3651         mutex_lock(&adev->grbm_idx_mutex);
3652         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3653                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3654                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3655                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3656                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3657                                                rb_bitmap_width_per_sh);
3658                 }
3659         }
3660         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3661
3662         adev->gfx.config.backend_enable_mask = active_rbs;
3663         adev->gfx.config.num_rbs = hweight32(active_rbs);
3664
3665         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3666                              adev->gfx.config.max_shader_engines, 16);
3667
3668         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3669
3670         if (!adev->gfx.config.backend_enable_mask ||
3671                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3672                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3673                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3674         } else {
3675                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3676                                                         adev->gfx.config.backend_enable_mask,
3677                                                         num_rb_pipes);
3678         }
3679
3680         /* cache the values for userspace */
3681         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3682                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3683                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3684                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3685                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3686                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3687                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3688                         adev->gfx.config.rb_config[i][j].raster_config =
3689                                 RREG32(mmPA_SC_RASTER_CONFIG);
3690                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3691                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3692                 }
3693         }
3694         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3695         mutex_unlock(&adev->grbm_idx_mutex);
3696 }
3697
3698 /**
3699  * gfx_v8_0_init_compute_vmid - gart enable
3700  *
3701  * @adev: amdgpu_device pointer
3702  *
3703  * Initialize compute vmid sh_mem registers
3704  *
3705  */
3706 #define DEFAULT_SH_MEM_BASES    (0x6000)
3707 #define FIRST_COMPUTE_VMID      (8)
3708 #define LAST_COMPUTE_VMID       (16)
3709 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3710 {
3711         int i;
3712         uint32_t sh_mem_config;
3713         uint32_t sh_mem_bases;
3714
3715         /*
3716          * Configure apertures:
3717          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3718          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3719          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3720          */
3721         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3722
3723         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3724                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3725                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3726                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3727                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3728                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3729
3730         mutex_lock(&adev->srbm_mutex);
3731         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3732                 vi_srbm_select(adev, 0, 0, 0, i);
3733                 /* CP and shaders */
3734                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3735                 WREG32(mmSH_MEM_APE1_BASE, 1);
3736                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3737                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3738         }
3739         vi_srbm_select(adev, 0, 0, 0, 0);
3740         mutex_unlock(&adev->srbm_mutex);
3741
3742         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3743            acccess. These should be enabled by FW for target VMIDs. */
3744         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3745                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3746                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3747                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3748                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3749         }
3750 }
3751
3752 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3753 {
3754         int vmid;
3755
3756         /*
3757          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3758          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3759          * the driver can enable them for graphics. VMID0 should maintain
3760          * access so that HWS firmware can save/restore entries.
3761          */
3762         for (vmid = 1; vmid < 16; vmid++) {
3763                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3764                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3765                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3766                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3767         }
3768 }
3769
3770 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3771 {
3772         switch (adev->asic_type) {
3773         default:
3774                 adev->gfx.config.double_offchip_lds_buf = 1;
3775                 break;
3776         case CHIP_CARRIZO:
3777         case CHIP_STONEY:
3778                 adev->gfx.config.double_offchip_lds_buf = 0;
3779                 break;
3780         }
3781 }
3782
3783 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3784 {
3785         u32 tmp, sh_static_mem_cfg;
3786         int i;
3787
3788         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3789         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3791         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3792
3793         gfx_v8_0_tiling_mode_table_init(adev);
3794         gfx_v8_0_setup_rb(adev);
3795         gfx_v8_0_get_cu_info(adev);
3796         gfx_v8_0_config_init(adev);
3797
3798         /* XXX SH_MEM regs */
3799         /* where to put LDS, scratch, GPUVM in FSA64 space */
3800         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3801                                    SWIZZLE_ENABLE, 1);
3802         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3803                                    ELEMENT_SIZE, 1);
3804         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3805                                    INDEX_STRIDE, 3);
3806         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3807
3808         mutex_lock(&adev->srbm_mutex);
3809         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3810                 vi_srbm_select(adev, 0, 0, 0, i);
3811                 /* CP and shaders */
3812                 if (i == 0) {
3813                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3814                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3815                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3816                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3817                         WREG32(mmSH_MEM_CONFIG, tmp);
3818                         WREG32(mmSH_MEM_BASES, 0);
3819                 } else {
3820                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3821                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3822                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3823                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3824                         WREG32(mmSH_MEM_CONFIG, tmp);
3825                         tmp = adev->gmc.shared_aperture_start >> 48;
3826                         WREG32(mmSH_MEM_BASES, tmp);
3827                 }
3828
3829                 WREG32(mmSH_MEM_APE1_BASE, 1);
3830                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3831         }
3832         vi_srbm_select(adev, 0, 0, 0, 0);
3833         mutex_unlock(&adev->srbm_mutex);
3834
3835         gfx_v8_0_init_compute_vmid(adev);
3836         gfx_v8_0_init_gds_vmid(adev);
3837
3838         mutex_lock(&adev->grbm_idx_mutex);
3839         /*
3840          * making sure that the following register writes will be broadcasted
3841          * to all the shaders
3842          */
3843         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3844
3845         WREG32(mmPA_SC_FIFO_SIZE,
3846                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3847                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3848                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3849                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3850                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3851                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3852                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3853                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3854
3855         tmp = RREG32(mmSPI_ARB_PRIORITY);
3856         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3857         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3858         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3859         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3860         WREG32(mmSPI_ARB_PRIORITY, tmp);
3861
3862         mutex_unlock(&adev->grbm_idx_mutex);
3863
3864 }
3865
3866 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3867 {
3868         u32 i, j, k;
3869         u32 mask;
3870
3871         mutex_lock(&adev->grbm_idx_mutex);
3872         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3873                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3874                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3875                         for (k = 0; k < adev->usec_timeout; k++) {
3876                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3877                                         break;
3878                                 udelay(1);
3879                         }
3880                         if (k == adev->usec_timeout) {
3881                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3882                                                       0xffffffff, 0xffffffff);
3883                                 mutex_unlock(&adev->grbm_idx_mutex);
3884                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3885                                          i, j);
3886                                 return;
3887                         }
3888                 }
3889         }
3890         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3891         mutex_unlock(&adev->grbm_idx_mutex);
3892
3893         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3894                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3895                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3896                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3897         for (k = 0; k < adev->usec_timeout; k++) {
3898                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3899                         break;
3900                 udelay(1);
3901         }
3902 }
3903
3904 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3905                                                bool enable)
3906 {
3907         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3908
3909         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3910         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3911         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3912         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3913
3914         WREG32(mmCP_INT_CNTL_RING0, tmp);
3915 }
3916
3917 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3918 {
3919         /* csib */
3920         WREG32(mmRLC_CSIB_ADDR_HI,
3921                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3922         WREG32(mmRLC_CSIB_ADDR_LO,
3923                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3924         WREG32(mmRLC_CSIB_LENGTH,
3925                         adev->gfx.rlc.clear_state_size);
3926 }
3927
3928 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3929                                 int ind_offset,
3930                                 int list_size,
3931                                 int *unique_indices,
3932                                 int *indices_count,
3933                                 int max_indices,
3934                                 int *ind_start_offsets,
3935                                 int *offset_count,
3936                                 int max_offset)
3937 {
3938         int indices;
3939         bool new_entry = true;
3940
3941         for (; ind_offset < list_size; ind_offset++) {
3942
3943                 if (new_entry) {
3944                         new_entry = false;
3945                         ind_start_offsets[*offset_count] = ind_offset;
3946                         *offset_count = *offset_count + 1;
3947                         BUG_ON(*offset_count >= max_offset);
3948                 }
3949
3950                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3951                         new_entry = true;
3952                         continue;
3953                 }
3954
3955                 ind_offset += 2;
3956
3957                 /* look for the matching indice */
3958                 for (indices = 0;
3959                         indices < *indices_count;
3960                         indices++) {
3961                         if (unique_indices[indices] ==
3962                                 register_list_format[ind_offset])
3963                                 break;
3964                 }
3965
3966                 if (indices >= *indices_count) {
3967                         unique_indices[*indices_count] =
3968                                 register_list_format[ind_offset];
3969                         indices = *indices_count;
3970                         *indices_count = *indices_count + 1;
3971                         BUG_ON(*indices_count >= max_indices);
3972                 }
3973
3974                 register_list_format[ind_offset] = indices;
3975         }
3976 }
3977
3978 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3979 {
3980         int i, temp, data;
3981         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3982         int indices_count = 0;
3983         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3984         int offset_count = 0;
3985
3986         int list_size;
3987         unsigned int *register_list_format =
3988                 kmemdup(adev->gfx.rlc.register_list_format,
3989                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3990         if (!register_list_format)
3991                 return -ENOMEM;
3992
3993         gfx_v8_0_parse_ind_reg_list(register_list_format,
3994                                 RLC_FormatDirectRegListLength,
3995                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3996                                 unique_indices,
3997                                 &indices_count,
3998                                 ARRAY_SIZE(unique_indices),
3999                                 indirect_start_offsets,
4000                                 &offset_count,
4001                                 ARRAY_SIZE(indirect_start_offsets));
4002
4003         /* save and restore list */
4004         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4005
4006         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4007         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4008                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4009
4010         /* indirect list */
4011         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4012         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4013                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4014
4015         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4016         list_size = list_size >> 1;
4017         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4018         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4019
4020         /* starting offsets starts */
4021         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4022                 adev->gfx.rlc.starting_offsets_start);
4023         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4024                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4025                                 indirect_start_offsets[i]);
4026
4027         /* unique indices */
4028         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4029         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4030         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4031                 if (unique_indices[i] != 0) {
4032                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4033                         WREG32(data + i, unique_indices[i] >> 20);
4034                 }
4035         }
4036         kfree(register_list_format);
4037
4038         return 0;
4039 }
4040
4041 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4042 {
4043         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4044 }
4045
4046 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4047 {
4048         uint32_t data;
4049
4050         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4051
4052         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4053         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4054         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4055         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4056         WREG32(mmRLC_PG_DELAY, data);
4057
4058         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4059         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4060
4061 }
4062
4063 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4064                                                 bool enable)
4065 {
4066         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4067 }
4068
4069 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4070                                                   bool enable)
4071 {
4072         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4073 }
4074
4075 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4076 {
4077         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4078 }
4079
4080 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4081 {
4082         if ((adev->asic_type == CHIP_CARRIZO) ||
4083             (adev->asic_type == CHIP_STONEY)) {
4084                 gfx_v8_0_init_csb(adev);
4085                 gfx_v8_0_init_save_restore_list(adev);
4086                 gfx_v8_0_enable_save_restore_machine(adev);
4087                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4088                 gfx_v8_0_init_power_gating(adev);
4089                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4090         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4091                    (adev->asic_type == CHIP_POLARIS12) ||
4092                    (adev->asic_type == CHIP_VEGAM)) {
4093                 gfx_v8_0_init_csb(adev);
4094                 gfx_v8_0_init_save_restore_list(adev);
4095                 gfx_v8_0_enable_save_restore_machine(adev);
4096                 gfx_v8_0_init_power_gating(adev);
4097         }
4098
4099 }
4100
4101 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4102 {
4103         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4104
4105         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4106         gfx_v8_0_wait_for_rlc_serdes(adev);
4107 }
4108
4109 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4110 {
4111         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4112         udelay(50);
4113
4114         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4115         udelay(50);
4116 }
4117
4118 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4119 {
4120         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4121
4122         /* carrizo do enable cp interrupt after cp inited */
4123         if (!(adev->flags & AMD_IS_APU))
4124                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4125
4126         udelay(50);
4127 }
4128
4129 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4130 {
4131         if (amdgpu_sriov_vf(adev)) {
4132                 gfx_v8_0_init_csb(adev);
4133                 return 0;
4134         }
4135
4136         adev->gfx.rlc.funcs->stop(adev);
4137         adev->gfx.rlc.funcs->reset(adev);
4138         gfx_v8_0_init_pg(adev);
4139         adev->gfx.rlc.funcs->start(adev);
4140
4141         return 0;
4142 }
4143
4144 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4145 {
4146         int i;
4147         u32 tmp = RREG32(mmCP_ME_CNTL);
4148
4149         if (enable) {
4150                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4151                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4152                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4153         } else {
4154                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4155                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4156                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4157                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4158                         adev->gfx.gfx_ring[i].sched.ready = false;
4159         }
4160         WREG32(mmCP_ME_CNTL, tmp);
4161         udelay(50);
4162 }
4163
4164 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4165 {
4166         u32 count = 0;
4167         const struct cs_section_def *sect = NULL;
4168         const struct cs_extent_def *ext = NULL;
4169
4170         /* begin clear state */
4171         count += 2;
4172         /* context control state */
4173         count += 3;
4174
4175         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4176                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4177                         if (sect->id == SECT_CONTEXT)
4178                                 count += 2 + ext->reg_count;
4179                         else
4180                                 return 0;
4181                 }
4182         }
4183         /* pa_sc_raster_config/pa_sc_raster_config1 */
4184         count += 4;
4185         /* end clear state */
4186         count += 2;
4187         /* clear state */
4188         count += 2;
4189
4190         return count;
4191 }
4192
4193 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4194 {
4195         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4196         const struct cs_section_def *sect = NULL;
4197         const struct cs_extent_def *ext = NULL;
4198         int r, i;
4199
4200         /* init the CP */
4201         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4202         WREG32(mmCP_ENDIAN_SWAP, 0);
4203         WREG32(mmCP_DEVICE_ID, 1);
4204
4205         gfx_v8_0_cp_gfx_enable(adev, true);
4206
4207         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4208         if (r) {
4209                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4210                 return r;
4211         }
4212
4213         /* clear state buffer */
4214         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4215         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4216
4217         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4218         amdgpu_ring_write(ring, 0x80000000);
4219         amdgpu_ring_write(ring, 0x80000000);
4220
4221         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4222                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4223                         if (sect->id == SECT_CONTEXT) {
4224                                 amdgpu_ring_write(ring,
4225                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4226                                                ext->reg_count));
4227                                 amdgpu_ring_write(ring,
4228                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4229                                 for (i = 0; i < ext->reg_count; i++)
4230                                         amdgpu_ring_write(ring, ext->extent[i]);
4231                         }
4232                 }
4233         }
4234
4235         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4236         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4237         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4238         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4239
4240         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4241         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4242
4243         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4244         amdgpu_ring_write(ring, 0);
4245
4246         /* init the CE partitions */
4247         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4248         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4249         amdgpu_ring_write(ring, 0x8000);
4250         amdgpu_ring_write(ring, 0x8000);
4251
4252         amdgpu_ring_commit(ring);
4253
4254         return 0;
4255 }
4256 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4257 {
4258         u32 tmp;
4259         /* no gfx doorbells on iceland */
4260         if (adev->asic_type == CHIP_TOPAZ)
4261                 return;
4262
4263         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4264
4265         if (ring->use_doorbell) {
4266                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4267                                 DOORBELL_OFFSET, ring->doorbell_index);
4268                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4269                                                 DOORBELL_HIT, 0);
4270                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4271                                             DOORBELL_EN, 1);
4272         } else {
4273                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4274         }
4275
4276         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4277
4278         if (adev->flags & AMD_IS_APU)
4279                 return;
4280
4281         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4282                                         DOORBELL_RANGE_LOWER,
4283                                         adev->doorbell_index.gfx_ring0);
4284         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4285
4286         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4287                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4288 }
4289
4290 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4291 {
4292         struct amdgpu_ring *ring;
4293         u32 tmp;
4294         u32 rb_bufsz;
4295         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4296
4297         /* Set the write pointer delay */
4298         WREG32(mmCP_RB_WPTR_DELAY, 0);
4299
4300         /* set the RB to use vmid 0 */
4301         WREG32(mmCP_RB_VMID, 0);
4302
4303         /* Set ring buffer size */
4304         ring = &adev->gfx.gfx_ring[0];
4305         rb_bufsz = order_base_2(ring->ring_size / 8);
4306         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4307         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4308         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4309         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4310 #ifdef __BIG_ENDIAN
4311         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4312 #endif
4313         WREG32(mmCP_RB0_CNTL, tmp);
4314
4315         /* Initialize the ring buffer's read and write pointers */
4316         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4317         ring->wptr = 0;
4318         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4319
4320         /* set the wb address wether it's enabled or not */
4321         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4322         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4323         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4324
4325         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4326         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4327         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4328         mdelay(1);
4329         WREG32(mmCP_RB0_CNTL, tmp);
4330
4331         rb_addr = ring->gpu_addr >> 8;
4332         WREG32(mmCP_RB0_BASE, rb_addr);
4333         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4334
4335         gfx_v8_0_set_cpg_door_bell(adev, ring);
4336         /* start the ring */
4337         amdgpu_ring_clear_ring(ring);
4338         gfx_v8_0_cp_gfx_start(adev);
4339         ring->sched.ready = true;
4340
4341         return 0;
4342 }
4343
4344 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4345 {
4346         int i;
4347
4348         if (enable) {
4349                 WREG32(mmCP_MEC_CNTL, 0);
4350         } else {
4351                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4352                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4353                         adev->gfx.compute_ring[i].sched.ready = false;
4354                 adev->gfx.kiq.ring.sched.ready = false;
4355         }
4356         udelay(50);
4357 }
4358
4359 /* KIQ functions */
4360 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4361 {
4362         uint32_t tmp;
4363         struct amdgpu_device *adev = ring->adev;
4364
4365         /* tell RLC which is KIQ queue */
4366         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4367         tmp &= 0xffffff00;
4368         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4369         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4370         tmp |= 0x80;
4371         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4372 }
4373
4374 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4375 {
4376         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4377         uint64_t queue_mask = 0;
4378         int r, i;
4379
4380         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4381                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4382                         continue;
4383
4384                 /* This situation may be hit in the future if a new HW
4385                  * generation exposes more than 64 queues. If so, the
4386                  * definition of queue_mask needs updating */
4387                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4388                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4389                         break;
4390                 }
4391
4392                 queue_mask |= (1ull << i);
4393         }
4394
4395         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4396         if (r) {
4397                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4398                 return r;
4399         }
4400         /* set resources */
4401         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4402         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4403         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4404         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4405         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4406         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4407         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4408         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4409         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4410                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4411                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4412                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4413
4414                 /* map queues */
4415                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4416                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4417                 amdgpu_ring_write(kiq_ring,
4418                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4419                 amdgpu_ring_write(kiq_ring,
4420                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4421                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4422                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4423                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4424                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4425                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4426                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4427                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4428         }
4429
4430         amdgpu_ring_commit(kiq_ring);
4431
4432         return 0;
4433 }
4434
4435 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4436 {
4437         int i, r = 0;
4438
4439         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4440                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4441                 for (i = 0; i < adev->usec_timeout; i++) {
4442                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4443                                 break;
4444                         udelay(1);
4445                 }
4446                 if (i == adev->usec_timeout)
4447                         r = -ETIMEDOUT;
4448         }
4449         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4450         WREG32(mmCP_HQD_PQ_RPTR, 0);
4451         WREG32(mmCP_HQD_PQ_WPTR, 0);
4452
4453         return r;
4454 }
4455
4456 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4457 {
4458         struct amdgpu_device *adev = ring->adev;
4459         struct vi_mqd *mqd = ring->mqd_ptr;
4460         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4461         uint32_t tmp;
4462
4463         mqd->header = 0xC0310800;
4464         mqd->compute_pipelinestat_enable = 0x00000001;
4465         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4466         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4467         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4468         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4469         mqd->compute_misc_reserved = 0x00000003;
4470         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4471                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4472         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4473                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4474         eop_base_addr = ring->eop_gpu_addr >> 8;
4475         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4476         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4477
4478         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4479         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4480         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4481                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4482
4483         mqd->cp_hqd_eop_control = tmp;
4484
4485         /* enable doorbell? */
4486         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4487                             CP_HQD_PQ_DOORBELL_CONTROL,
4488                             DOORBELL_EN,
4489                             ring->use_doorbell ? 1 : 0);
4490
4491         mqd->cp_hqd_pq_doorbell_control = tmp;
4492
4493         /* set the pointer to the MQD */
4494         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4495         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4496
4497         /* set MQD vmid to 0 */
4498         tmp = RREG32(mmCP_MQD_CONTROL);
4499         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4500         mqd->cp_mqd_control = tmp;
4501
4502         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4503         hqd_gpu_addr = ring->gpu_addr >> 8;
4504         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4505         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4506
4507         /* set up the HQD, this is similar to CP_RB0_CNTL */
4508         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4509         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4510                             (order_base_2(ring->ring_size / 4) - 1));
4511         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4512                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4513 #ifdef __BIG_ENDIAN
4514         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4515 #endif
4516         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4517         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4518         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4519         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4520         mqd->cp_hqd_pq_control = tmp;
4521
4522         /* set the wb address whether it's enabled or not */
4523         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4524         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4525         mqd->cp_hqd_pq_rptr_report_addr_hi =
4526                 upper_32_bits(wb_gpu_addr) & 0xffff;
4527
4528         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4529         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4530         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4531         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4532
4533         tmp = 0;
4534         /* enable the doorbell if requested */
4535         if (ring->use_doorbell) {
4536                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4537                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4538                                 DOORBELL_OFFSET, ring->doorbell_index);
4539
4540                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4541                                          DOORBELL_EN, 1);
4542                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4543                                          DOORBELL_SOURCE, 0);
4544                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4545                                          DOORBELL_HIT, 0);
4546         }
4547
4548         mqd->cp_hqd_pq_doorbell_control = tmp;
4549
4550         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4551         ring->wptr = 0;
4552         mqd->cp_hqd_pq_wptr = ring->wptr;
4553         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4554
4555         /* set the vmid for the queue */
4556         mqd->cp_hqd_vmid = 0;
4557
4558         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4559         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4560         mqd->cp_hqd_persistent_state = tmp;
4561
4562         /* set MTYPE */
4563         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4564         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4565         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4566         mqd->cp_hqd_ib_control = tmp;
4567
4568         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4569         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4570         mqd->cp_hqd_iq_timer = tmp;
4571
4572         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4573         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4574         mqd->cp_hqd_ctx_save_control = tmp;
4575
4576         /* defaults */
4577         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4578         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4579         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4580         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4581         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4582         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4583         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4584         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4585         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4586         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4587         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4588         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4589         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4590         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4591         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4592
4593         /* activate the queue */
4594         mqd->cp_hqd_active = 1;
4595
4596         return 0;
4597 }
4598
4599 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4600                         struct vi_mqd *mqd)
4601 {
4602         uint32_t mqd_reg;
4603         uint32_t *mqd_data;
4604
4605         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4606         mqd_data = &mqd->cp_mqd_base_addr_lo;
4607
4608         /* disable wptr polling */
4609         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4610
4611         /* program all HQD registers */
4612         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4613                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4614
4615         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4616          * This is safe since EOP RPTR==WPTR for any inactive HQD
4617          * on ASICs that do not support context-save.
4618          * EOP writes/reads can start anywhere in the ring.
4619          */
4620         if (adev->asic_type != CHIP_TONGA) {
4621                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4622                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4623                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4624         }
4625
4626         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4627                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4628
4629         /* activate the HQD */
4630         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4631                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4632
4633         return 0;
4634 }
4635
4636 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4637 {
4638         struct amdgpu_device *adev = ring->adev;
4639         struct vi_mqd *mqd = ring->mqd_ptr;
4640         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4641
4642         gfx_v8_0_kiq_setting(ring);
4643
4644         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4645                 /* reset MQD to a clean status */
4646                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4647                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4648
4649                 /* reset ring buffer */
4650                 ring->wptr = 0;
4651                 amdgpu_ring_clear_ring(ring);
4652                 mutex_lock(&adev->srbm_mutex);
4653                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4654                 gfx_v8_0_mqd_commit(adev, mqd);
4655                 vi_srbm_select(adev, 0, 0, 0, 0);
4656                 mutex_unlock(&adev->srbm_mutex);
4657         } else {
4658                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4659                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4660                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4661                 mutex_lock(&adev->srbm_mutex);
4662                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4663                 gfx_v8_0_mqd_init(ring);
4664                 gfx_v8_0_mqd_commit(adev, mqd);
4665                 vi_srbm_select(adev, 0, 0, 0, 0);
4666                 mutex_unlock(&adev->srbm_mutex);
4667
4668                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4669                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4670         }
4671
4672         return 0;
4673 }
4674
4675 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4676 {
4677         struct amdgpu_device *adev = ring->adev;
4678         struct vi_mqd *mqd = ring->mqd_ptr;
4679         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4680
4681         if (!adev->in_gpu_reset && !adev->in_suspend) {
4682                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4683                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4684                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4685                 mutex_lock(&adev->srbm_mutex);
4686                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4687                 gfx_v8_0_mqd_init(ring);
4688                 vi_srbm_select(adev, 0, 0, 0, 0);
4689                 mutex_unlock(&adev->srbm_mutex);
4690
4691                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4692                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4693         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4694                 /* reset MQD to a clean status */
4695                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4696                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4697                 /* reset ring buffer */
4698                 ring->wptr = 0;
4699                 amdgpu_ring_clear_ring(ring);
4700         } else {
4701                 amdgpu_ring_clear_ring(ring);
4702         }
4703         return 0;
4704 }
4705
4706 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4707 {
4708         if (adev->asic_type > CHIP_TONGA) {
4709                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4710                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4711         }
4712         /* enable doorbells */
4713         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4714 }
4715
4716 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4717 {
4718         struct amdgpu_ring *ring;
4719         int r;
4720
4721         ring = &adev->gfx.kiq.ring;
4722
4723         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4724         if (unlikely(r != 0))
4725                 return r;
4726
4727         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4728         if (unlikely(r != 0))
4729                 return r;
4730
4731         gfx_v8_0_kiq_init_queue(ring);
4732         amdgpu_bo_kunmap(ring->mqd_obj);
4733         ring->mqd_ptr = NULL;
4734         amdgpu_bo_unreserve(ring->mqd_obj);
4735         ring->sched.ready = true;
4736         return 0;
4737 }
4738
4739 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4740 {
4741         struct amdgpu_ring *ring = NULL;
4742         int r = 0, i;
4743
4744         gfx_v8_0_cp_compute_enable(adev, true);
4745
4746         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4747                 ring = &adev->gfx.compute_ring[i];
4748
4749                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4750                 if (unlikely(r != 0))
4751                         goto done;
4752                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4753                 if (!r) {
4754                         r = gfx_v8_0_kcq_init_queue(ring);
4755                         amdgpu_bo_kunmap(ring->mqd_obj);
4756                         ring->mqd_ptr = NULL;
4757                 }
4758                 amdgpu_bo_unreserve(ring->mqd_obj);
4759                 if (r)
4760                         goto done;
4761         }
4762
4763         gfx_v8_0_set_mec_doorbell_range(adev);
4764
4765         r = gfx_v8_0_kiq_kcq_enable(adev);
4766         if (r)
4767                 goto done;
4768
4769 done:
4770         return r;
4771 }
4772
4773 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4774 {
4775         int r, i;
4776         struct amdgpu_ring *ring;
4777
4778         /* collect all the ring_tests here, gfx, kiq, compute */
4779         ring = &adev->gfx.gfx_ring[0];
4780         r = amdgpu_ring_test_helper(ring);
4781         if (r)
4782                 return r;
4783
4784         ring = &adev->gfx.kiq.ring;
4785         r = amdgpu_ring_test_helper(ring);
4786         if (r)
4787                 return r;
4788
4789         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4790                 ring = &adev->gfx.compute_ring[i];
4791                 amdgpu_ring_test_helper(ring);
4792         }
4793
4794         return 0;
4795 }
4796
4797 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4798 {
4799         int r;
4800
4801         if (!(adev->flags & AMD_IS_APU))
4802                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4803
4804         r = gfx_v8_0_kiq_resume(adev);
4805         if (r)
4806                 return r;
4807
4808         r = gfx_v8_0_cp_gfx_resume(adev);
4809         if (r)
4810                 return r;
4811
4812         r = gfx_v8_0_kcq_resume(adev);
4813         if (r)
4814                 return r;
4815
4816         r = gfx_v8_0_cp_test_all_rings(adev);
4817         if (r)
4818                 return r;
4819
4820         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4821
4822         return 0;
4823 }
4824
4825 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4826 {
4827         gfx_v8_0_cp_gfx_enable(adev, enable);
4828         gfx_v8_0_cp_compute_enable(adev, enable);
4829 }
4830
4831 static int gfx_v8_0_hw_init(void *handle)
4832 {
4833         int r;
4834         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4835
4836         gfx_v8_0_init_golden_registers(adev);
4837         gfx_v8_0_constants_init(adev);
4838
4839         r = gfx_v8_0_csb_vram_pin(adev);
4840         if (r)
4841                 return r;
4842
4843         r = adev->gfx.rlc.funcs->resume(adev);
4844         if (r)
4845                 return r;
4846
4847         r = gfx_v8_0_cp_resume(adev);
4848
4849         return r;
4850 }
4851
4852 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4853 {
4854         int r, i;
4855         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4856
4857         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4858         if (r)
4859                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4860
4861         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4862                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4863
4864                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4865                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4866                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4867                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4868                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4869                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4870                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4871                 amdgpu_ring_write(kiq_ring, 0);
4872                 amdgpu_ring_write(kiq_ring, 0);
4873                 amdgpu_ring_write(kiq_ring, 0);
4874         }
4875         r = amdgpu_ring_test_helper(kiq_ring);
4876         if (r)
4877                 DRM_ERROR("KCQ disable failed\n");
4878
4879         return r;
4880 }
4881
4882 static bool gfx_v8_0_is_idle(void *handle)
4883 {
4884         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4885
4886         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4887                 || RREG32(mmGRBM_STATUS2) != 0x8)
4888                 return false;
4889         else
4890                 return true;
4891 }
4892
4893 static bool gfx_v8_0_rlc_is_idle(void *handle)
4894 {
4895         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4896
4897         if (RREG32(mmGRBM_STATUS2) != 0x8)
4898                 return false;
4899         else
4900                 return true;
4901 }
4902
4903 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4904 {
4905         unsigned int i;
4906         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4907
4908         for (i = 0; i < adev->usec_timeout; i++) {
4909                 if (gfx_v8_0_rlc_is_idle(handle))
4910                         return 0;
4911
4912                 udelay(1);
4913         }
4914         return -ETIMEDOUT;
4915 }
4916
4917 static int gfx_v8_0_wait_for_idle(void *handle)
4918 {
4919         unsigned int i;
4920         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4921
4922         for (i = 0; i < adev->usec_timeout; i++) {
4923                 if (gfx_v8_0_is_idle(handle))
4924                         return 0;
4925
4926                 udelay(1);
4927         }
4928         return -ETIMEDOUT;
4929 }
4930
4931 static int gfx_v8_0_hw_fini(void *handle)
4932 {
4933         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4934
4935         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4936         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4937
4938         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4939
4940         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4941
4942         /* disable KCQ to avoid CPC touch memory not valid anymore */
4943         gfx_v8_0_kcq_disable(adev);
4944
4945         if (amdgpu_sriov_vf(adev)) {
4946                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4947                 return 0;
4948         }
4949         amdgpu_gfx_rlc_enter_safe_mode(adev);
4950         if (!gfx_v8_0_wait_for_idle(adev))
4951                 gfx_v8_0_cp_enable(adev, false);
4952         else
4953                 pr_err("cp is busy, skip halt cp\n");
4954         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4955                 adev->gfx.rlc.funcs->stop(adev);
4956         else
4957                 pr_err("rlc is busy, skip halt rlc\n");
4958         amdgpu_gfx_rlc_exit_safe_mode(adev);
4959
4960         gfx_v8_0_csb_vram_unpin(adev);
4961
4962         return 0;
4963 }
4964
4965 static int gfx_v8_0_suspend(void *handle)
4966 {
4967         return gfx_v8_0_hw_fini(handle);
4968 }
4969
4970 static int gfx_v8_0_resume(void *handle)
4971 {
4972         return gfx_v8_0_hw_init(handle);
4973 }
4974
4975 static bool gfx_v8_0_check_soft_reset(void *handle)
4976 {
4977         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4978         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4979         u32 tmp;
4980
4981         /* GRBM_STATUS */
4982         tmp = RREG32(mmGRBM_STATUS);
4983         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4984                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4985                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4986                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4987                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4988                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4989                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4990                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4991                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4992                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4993                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4994                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4995                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4996         }
4997
4998         /* GRBM_STATUS2 */
4999         tmp = RREG32(mmGRBM_STATUS2);
5000         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5001                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5002                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5003
5004         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5005             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5006             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5007                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5008                                                 SOFT_RESET_CPF, 1);
5009                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5010                                                 SOFT_RESET_CPC, 1);
5011                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5012                                                 SOFT_RESET_CPG, 1);
5013                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5014                                                 SOFT_RESET_GRBM, 1);
5015         }
5016
5017         /* SRBM_STATUS */
5018         tmp = RREG32(mmSRBM_STATUS);
5019         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5020                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5021                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5022         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5023                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5024                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5025
5026         if (grbm_soft_reset || srbm_soft_reset) {
5027                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5028                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5029                 return true;
5030         } else {
5031                 adev->gfx.grbm_soft_reset = 0;
5032                 adev->gfx.srbm_soft_reset = 0;
5033                 return false;
5034         }
5035 }
5036
5037 static int gfx_v8_0_pre_soft_reset(void *handle)
5038 {
5039         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5040         u32 grbm_soft_reset = 0;
5041
5042         if ((!adev->gfx.grbm_soft_reset) &&
5043             (!adev->gfx.srbm_soft_reset))
5044                 return 0;
5045
5046         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5047
5048         /* stop the rlc */
5049         adev->gfx.rlc.funcs->stop(adev);
5050
5051         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5052             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5053                 /* Disable GFX parsing/prefetching */
5054                 gfx_v8_0_cp_gfx_enable(adev, false);
5055
5056         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5057             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5058             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5059             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5060                 int i;
5061
5062                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5063                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5064
5065                         mutex_lock(&adev->srbm_mutex);
5066                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5067                         gfx_v8_0_deactivate_hqd(adev, 2);
5068                         vi_srbm_select(adev, 0, 0, 0, 0);
5069                         mutex_unlock(&adev->srbm_mutex);
5070                 }
5071                 /* Disable MEC parsing/prefetching */
5072                 gfx_v8_0_cp_compute_enable(adev, false);
5073         }
5074
5075        return 0;
5076 }
5077
5078 static int gfx_v8_0_soft_reset(void *handle)
5079 {
5080         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5081         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5082         u32 tmp;
5083
5084         if ((!adev->gfx.grbm_soft_reset) &&
5085             (!adev->gfx.srbm_soft_reset))
5086                 return 0;
5087
5088         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5089         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5090
5091         if (grbm_soft_reset || srbm_soft_reset) {
5092                 tmp = RREG32(mmGMCON_DEBUG);
5093                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5094                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5095                 WREG32(mmGMCON_DEBUG, tmp);
5096                 udelay(50);
5097         }
5098
5099         if (grbm_soft_reset) {
5100                 tmp = RREG32(mmGRBM_SOFT_RESET);
5101                 tmp |= grbm_soft_reset;
5102                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5103                 WREG32(mmGRBM_SOFT_RESET, tmp);
5104                 tmp = RREG32(mmGRBM_SOFT_RESET);
5105
5106                 udelay(50);
5107
5108                 tmp &= ~grbm_soft_reset;
5109                 WREG32(mmGRBM_SOFT_RESET, tmp);
5110                 tmp = RREG32(mmGRBM_SOFT_RESET);
5111         }
5112
5113         if (srbm_soft_reset) {
5114                 tmp = RREG32(mmSRBM_SOFT_RESET);
5115                 tmp |= srbm_soft_reset;
5116                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5117                 WREG32(mmSRBM_SOFT_RESET, tmp);
5118                 tmp = RREG32(mmSRBM_SOFT_RESET);
5119
5120                 udelay(50);
5121
5122                 tmp &= ~srbm_soft_reset;
5123                 WREG32(mmSRBM_SOFT_RESET, tmp);
5124                 tmp = RREG32(mmSRBM_SOFT_RESET);
5125         }
5126
5127         if (grbm_soft_reset || srbm_soft_reset) {
5128                 tmp = RREG32(mmGMCON_DEBUG);
5129                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5130                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5131                 WREG32(mmGMCON_DEBUG, tmp);
5132         }
5133
5134         /* Wait a little for things to settle down */
5135         udelay(50);
5136
5137         return 0;
5138 }
5139
5140 static int gfx_v8_0_post_soft_reset(void *handle)
5141 {
5142         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143         u32 grbm_soft_reset = 0;
5144
5145         if ((!adev->gfx.grbm_soft_reset) &&
5146             (!adev->gfx.srbm_soft_reset))
5147                 return 0;
5148
5149         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5150
5151         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5152             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5153             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5154             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5155                 int i;
5156
5157                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5158                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5159
5160                         mutex_lock(&adev->srbm_mutex);
5161                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5162                         gfx_v8_0_deactivate_hqd(adev, 2);
5163                         vi_srbm_select(adev, 0, 0, 0, 0);
5164                         mutex_unlock(&adev->srbm_mutex);
5165                 }
5166                 gfx_v8_0_kiq_resume(adev);
5167                 gfx_v8_0_kcq_resume(adev);
5168         }
5169
5170         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5171             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5172                 gfx_v8_0_cp_gfx_resume(adev);
5173
5174         gfx_v8_0_cp_test_all_rings(adev);
5175
5176         adev->gfx.rlc.funcs->start(adev);
5177
5178         return 0;
5179 }
5180
5181 /**
5182  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5183  *
5184  * @adev: amdgpu_device pointer
5185  *
5186  * Fetches a GPU clock counter snapshot.
5187  * Returns the 64 bit clock counter snapshot.
5188  */
5189 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5190 {
5191         uint64_t clock;
5192
5193         mutex_lock(&adev->gfx.gpu_clock_mutex);
5194         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5195         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5196                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5197         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5198         return clock;
5199 }
5200
5201 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5202                                           uint32_t vmid,
5203                                           uint32_t gds_base, uint32_t gds_size,
5204                                           uint32_t gws_base, uint32_t gws_size,
5205                                           uint32_t oa_base, uint32_t oa_size)
5206 {
5207         /* GDS Base */
5208         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5209         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5210                                 WRITE_DATA_DST_SEL(0)));
5211         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5212         amdgpu_ring_write(ring, 0);
5213         amdgpu_ring_write(ring, gds_base);
5214
5215         /* GDS Size */
5216         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5217         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5218                                 WRITE_DATA_DST_SEL(0)));
5219         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5220         amdgpu_ring_write(ring, 0);
5221         amdgpu_ring_write(ring, gds_size);
5222
5223         /* GWS */
5224         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5225         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5226                                 WRITE_DATA_DST_SEL(0)));
5227         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5228         amdgpu_ring_write(ring, 0);
5229         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5230
5231         /* OA */
5232         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5233         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5234                                 WRITE_DATA_DST_SEL(0)));
5235         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5236         amdgpu_ring_write(ring, 0);
5237         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5238 }
5239
5240 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5241 {
5242         WREG32(mmSQ_IND_INDEX,
5243                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5244                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5245                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5246                 (SQ_IND_INDEX__FORCE_READ_MASK));
5247         return RREG32(mmSQ_IND_DATA);
5248 }
5249
5250 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5251                            uint32_t wave, uint32_t thread,
5252                            uint32_t regno, uint32_t num, uint32_t *out)
5253 {
5254         WREG32(mmSQ_IND_INDEX,
5255                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5256                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5257                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5258                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5259                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5260                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5261         while (num--)
5262                 *(out++) = RREG32(mmSQ_IND_DATA);
5263 }
5264
5265 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5266 {
5267         /* type 0 wave data */
5268         dst[(*no_fields)++] = 0;
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5287 }
5288
5289 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5290                                      uint32_t wave, uint32_t start,
5291                                      uint32_t size, uint32_t *dst)
5292 {
5293         wave_read_regs(
5294                 adev, simd, wave, 0,
5295                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5296 }
5297
5298
5299 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5300         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5301         .select_se_sh = &gfx_v8_0_select_se_sh,
5302         .read_wave_data = &gfx_v8_0_read_wave_data,
5303         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5304         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5305 };
5306
5307 static int gfx_v8_0_early_init(void *handle)
5308 {
5309         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5310
5311         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5312         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5313         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5314         gfx_v8_0_set_ring_funcs(adev);
5315         gfx_v8_0_set_irq_funcs(adev);
5316         gfx_v8_0_set_gds_init(adev);
5317         gfx_v8_0_set_rlc_funcs(adev);
5318
5319         return 0;
5320 }
5321
5322 static int gfx_v8_0_late_init(void *handle)
5323 {
5324         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5325         int r;
5326
5327         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5328         if (r)
5329                 return r;
5330
5331         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5332         if (r)
5333                 return r;
5334
5335         /* requires IBs so do in late init after IB pool is initialized */
5336         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5337         if (r)
5338                 return r;
5339
5340         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5341         if (r) {
5342                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5343                 return r;
5344         }
5345
5346         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5347         if (r) {
5348                 DRM_ERROR(
5349                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5350                         r);
5351                 return r;
5352         }
5353
5354         return 0;
5355 }
5356
5357 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5358                                                        bool enable)
5359 {
5360         if (((adev->asic_type == CHIP_POLARIS11) ||
5361             (adev->asic_type == CHIP_POLARIS12) ||
5362             (adev->asic_type == CHIP_VEGAM)) &&
5363             adev->powerplay.pp_funcs->set_powergating_by_smu)
5364                 /* Send msg to SMU via Powerplay */
5365                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5366
5367         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5368 }
5369
5370 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5371                                                         bool enable)
5372 {
5373         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5374 }
5375
5376 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5377                 bool enable)
5378 {
5379         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5380 }
5381
5382 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5383                                           bool enable)
5384 {
5385         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5386 }
5387
5388 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5389                                                 bool enable)
5390 {
5391         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5392
5393         /* Read any GFX register to wake up GFX. */
5394         if (!enable)
5395                 RREG32(mmDB_RENDER_CONTROL);
5396 }
5397
5398 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5399                                           bool enable)
5400 {
5401         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5402                 cz_enable_gfx_cg_power_gating(adev, true);
5403                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5404                         cz_enable_gfx_pipeline_power_gating(adev, true);
5405         } else {
5406                 cz_enable_gfx_cg_power_gating(adev, false);
5407                 cz_enable_gfx_pipeline_power_gating(adev, false);
5408         }
5409 }
5410
5411 static int gfx_v8_0_set_powergating_state(void *handle,
5412                                           enum amd_powergating_state state)
5413 {
5414         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5415         bool enable = (state == AMD_PG_STATE_GATE);
5416
5417         if (amdgpu_sriov_vf(adev))
5418                 return 0;
5419
5420         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5421                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5422                                 AMD_PG_SUPPORT_CP |
5423                                 AMD_PG_SUPPORT_GFX_DMG))
5424                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5425         switch (adev->asic_type) {
5426         case CHIP_CARRIZO:
5427         case CHIP_STONEY:
5428
5429                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5430                         cz_enable_sck_slow_down_on_power_up(adev, true);
5431                         cz_enable_sck_slow_down_on_power_down(adev, true);
5432                 } else {
5433                         cz_enable_sck_slow_down_on_power_up(adev, false);
5434                         cz_enable_sck_slow_down_on_power_down(adev, false);
5435                 }
5436                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5437                         cz_enable_cp_power_gating(adev, true);
5438                 else
5439                         cz_enable_cp_power_gating(adev, false);
5440
5441                 cz_update_gfx_cg_power_gating(adev, enable);
5442
5443                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5444                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5445                 else
5446                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5447
5448                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5449                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5450                 else
5451                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5452                 break;
5453         case CHIP_POLARIS11:
5454         case CHIP_POLARIS12:
5455         case CHIP_VEGAM:
5456                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5457                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5458                 else
5459                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5460
5461                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5462                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5463                 else
5464                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5465
5466                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5467                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5468                 else
5469                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5470                 break;
5471         default:
5472                 break;
5473         }
5474         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5475                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5476                                 AMD_PG_SUPPORT_CP |
5477                                 AMD_PG_SUPPORT_GFX_DMG))
5478                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5479         return 0;
5480 }
5481
5482 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5483 {
5484         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5485         int data;
5486
5487         if (amdgpu_sriov_vf(adev))
5488                 *flags = 0;
5489
5490         /* AMD_CG_SUPPORT_GFX_MGCG */
5491         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5492         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5493                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5494
5495         /* AMD_CG_SUPPORT_GFX_CGLG */
5496         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5497         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5498                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5499
5500         /* AMD_CG_SUPPORT_GFX_CGLS */
5501         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5502                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5503
5504         /* AMD_CG_SUPPORT_GFX_CGTS */
5505         data = RREG32(mmCGTS_SM_CTRL_REG);
5506         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5507                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5508
5509         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5510         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5511                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5512
5513         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5514         data = RREG32(mmRLC_MEM_SLP_CNTL);
5515         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5516                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5517
5518         /* AMD_CG_SUPPORT_GFX_CP_LS */
5519         data = RREG32(mmCP_MEM_SLP_CNTL);
5520         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5521                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5522 }
5523
5524 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5525                                      uint32_t reg_addr, uint32_t cmd)
5526 {
5527         uint32_t data;
5528
5529         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5530
5531         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5532         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5533
5534         data = RREG32(mmRLC_SERDES_WR_CTRL);
5535         if (adev->asic_type == CHIP_STONEY)
5536                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5537                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5538                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5539                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5540                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5541                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5542                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5543                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5544                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5545         else
5546                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5547                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5548                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5549                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5550                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5551                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5552                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5553                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5554                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5555                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5556                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5557         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5558                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5559                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5560                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5561
5562         WREG32(mmRLC_SERDES_WR_CTRL, data);
5563 }
5564
5565 #define MSG_ENTER_RLC_SAFE_MODE     1
5566 #define MSG_EXIT_RLC_SAFE_MODE      0
5567 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5568 #define RLC_GPR_REG2__REQ__SHIFT 0
5569 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5570 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5571
5572 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5573 {
5574         uint32_t rlc_setting;
5575
5576         rlc_setting = RREG32(mmRLC_CNTL);
5577         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5578                 return false;
5579
5580         return true;
5581 }
5582
5583 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5584 {
5585         uint32_t data;
5586         unsigned i;
5587         data = RREG32(mmRLC_CNTL);
5588         data |= RLC_SAFE_MODE__CMD_MASK;
5589         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5590         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5591         WREG32(mmRLC_SAFE_MODE, data);
5592
5593         /* wait for RLC_SAFE_MODE */
5594         for (i = 0; i < adev->usec_timeout; i++) {
5595                 if ((RREG32(mmRLC_GPM_STAT) &
5596                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5597                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5598                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5599                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5600                         break;
5601                 udelay(1);
5602         }
5603         for (i = 0; i < adev->usec_timeout; i++) {
5604                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5605                         break;
5606                 udelay(1);
5607         }
5608 }
5609
5610 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5611 {
5612         uint32_t data;
5613         unsigned i;
5614
5615         data = RREG32(mmRLC_CNTL);
5616         data |= RLC_SAFE_MODE__CMD_MASK;
5617         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5618         WREG32(mmRLC_SAFE_MODE, data);
5619
5620         for (i = 0; i < adev->usec_timeout; i++) {
5621                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5622                         break;
5623                 udelay(1);
5624         }
5625 }
5626
5627 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5628         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5629         .set_safe_mode = gfx_v8_0_set_safe_mode,
5630         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5631         .init = gfx_v8_0_rlc_init,
5632         .get_csb_size = gfx_v8_0_get_csb_size,
5633         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5634         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5635         .resume = gfx_v8_0_rlc_resume,
5636         .stop = gfx_v8_0_rlc_stop,
5637         .reset = gfx_v8_0_rlc_reset,
5638         .start = gfx_v8_0_rlc_start
5639 };
5640
5641 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5642                                                       bool enable)
5643 {
5644         uint32_t temp, data;
5645
5646         amdgpu_gfx_rlc_enter_safe_mode(adev);
5647
5648         /* It is disabled by HW by default */
5649         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5650                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5651                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5652                                 /* 1 - RLC memory Light sleep */
5653                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5654
5655                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5656                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5657                 }
5658
5659                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5660                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5661                 if (adev->flags & AMD_IS_APU)
5662                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5663                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5664                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5665                 else
5666                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5667                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5668                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5669                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5670
5671                 if (temp != data)
5672                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5673
5674                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5675                 gfx_v8_0_wait_for_rlc_serdes(adev);
5676
5677                 /* 5 - clear mgcg override */
5678                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5679
5680                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5681                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5682                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5683                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5684                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5685                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5686                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5687                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5688                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5689                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5690                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5691                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5692                         if (temp != data)
5693                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5694                 }
5695                 udelay(50);
5696
5697                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5698                 gfx_v8_0_wait_for_rlc_serdes(adev);
5699         } else {
5700                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5701                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5702                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5703                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5704                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5705                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5706                 if (temp != data)
5707                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5708
5709                 /* 2 - disable MGLS in RLC */
5710                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5711                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5712                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5713                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5714                 }
5715
5716                 /* 3 - disable MGLS in CP */
5717                 data = RREG32(mmCP_MEM_SLP_CNTL);
5718                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5719                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5720                         WREG32(mmCP_MEM_SLP_CNTL, data);
5721                 }
5722
5723                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5724                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5725                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5726                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5727                 if (temp != data)
5728                         WREG32(mmCGTS_SM_CTRL_REG, data);
5729
5730                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5731                 gfx_v8_0_wait_for_rlc_serdes(adev);
5732
5733                 /* 6 - set mgcg override */
5734                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5735
5736                 udelay(50);
5737
5738                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5739                 gfx_v8_0_wait_for_rlc_serdes(adev);
5740         }
5741
5742         amdgpu_gfx_rlc_exit_safe_mode(adev);
5743 }
5744
5745 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5746                                                       bool enable)
5747 {
5748         uint32_t temp, temp1, data, data1;
5749
5750         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5751
5752         amdgpu_gfx_rlc_enter_safe_mode(adev);
5753
5754         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5755                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5756                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5757                 if (temp1 != data1)
5758                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5759
5760                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5761                 gfx_v8_0_wait_for_rlc_serdes(adev);
5762
5763                 /* 2 - clear cgcg override */
5764                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5765
5766                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5767                 gfx_v8_0_wait_for_rlc_serdes(adev);
5768
5769                 /* 3 - write cmd to set CGLS */
5770                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5771
5772                 /* 4 - enable cgcg */
5773                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5774
5775                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5776                         /* enable cgls*/
5777                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5778
5779                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5780                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5781
5782                         if (temp1 != data1)
5783                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5784                 } else {
5785                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5786                 }
5787
5788                 if (temp != data)
5789                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5790
5791                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5792                  * Cmp_busy/GFX_Idle interrupts
5793                  */
5794                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5795         } else {
5796                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5797                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5798
5799                 /* TEST CGCG */
5800                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5801                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5802                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5803                 if (temp1 != data1)
5804                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5805
5806                 /* read gfx register to wake up cgcg */
5807                 RREG32(mmCB_CGTT_SCLK_CTRL);
5808                 RREG32(mmCB_CGTT_SCLK_CTRL);
5809                 RREG32(mmCB_CGTT_SCLK_CTRL);
5810                 RREG32(mmCB_CGTT_SCLK_CTRL);
5811
5812                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5813                 gfx_v8_0_wait_for_rlc_serdes(adev);
5814
5815                 /* write cmd to Set CGCG Overrride */
5816                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5817
5818                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5819                 gfx_v8_0_wait_for_rlc_serdes(adev);
5820
5821                 /* write cmd to Clear CGLS */
5822                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5823
5824                 /* disable cgcg, cgls should be disabled too. */
5825                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5826                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5827                 if (temp != data)
5828                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5829                 /* enable interrupts again for PG */
5830                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5831         }
5832
5833         gfx_v8_0_wait_for_rlc_serdes(adev);
5834
5835         amdgpu_gfx_rlc_exit_safe_mode(adev);
5836 }
5837 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5838                                             bool enable)
5839 {
5840         if (enable) {
5841                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5842                  * ===  MGCG + MGLS + TS(CG/LS) ===
5843                  */
5844                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5845                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5846         } else {
5847                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5848                  * ===  CGCG + CGLS ===
5849                  */
5850                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5851                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5852         }
5853         return 0;
5854 }
5855
5856 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5857                                           enum amd_clockgating_state state)
5858 {
5859         uint32_t msg_id, pp_state = 0;
5860         uint32_t pp_support_state = 0;
5861
5862         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5863                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5864                         pp_support_state = PP_STATE_SUPPORT_LS;
5865                         pp_state = PP_STATE_LS;
5866                 }
5867                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5868                         pp_support_state |= PP_STATE_SUPPORT_CG;
5869                         pp_state |= PP_STATE_CG;
5870                 }
5871                 if (state == AMD_CG_STATE_UNGATE)
5872                         pp_state = 0;
5873
5874                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5875                                 PP_BLOCK_GFX_CG,
5876                                 pp_support_state,
5877                                 pp_state);
5878                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5879                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5880         }
5881
5882         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5883                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5884                         pp_support_state = PP_STATE_SUPPORT_LS;
5885                         pp_state = PP_STATE_LS;
5886                 }
5887
5888                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5889                         pp_support_state |= PP_STATE_SUPPORT_CG;
5890                         pp_state |= PP_STATE_CG;
5891                 }
5892
5893                 if (state == AMD_CG_STATE_UNGATE)
5894                         pp_state = 0;
5895
5896                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5897                                 PP_BLOCK_GFX_MG,
5898                                 pp_support_state,
5899                                 pp_state);
5900                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5901                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5902         }
5903
5904         return 0;
5905 }
5906
5907 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5908                                           enum amd_clockgating_state state)
5909 {
5910
5911         uint32_t msg_id, pp_state = 0;
5912         uint32_t pp_support_state = 0;
5913
5914         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5915                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5916                         pp_support_state = PP_STATE_SUPPORT_LS;
5917                         pp_state = PP_STATE_LS;
5918                 }
5919                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5920                         pp_support_state |= PP_STATE_SUPPORT_CG;
5921                         pp_state |= PP_STATE_CG;
5922                 }
5923                 if (state == AMD_CG_STATE_UNGATE)
5924                         pp_state = 0;
5925
5926                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5927                                 PP_BLOCK_GFX_CG,
5928                                 pp_support_state,
5929                                 pp_state);
5930                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5931                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5932         }
5933
5934         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5935                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5936                         pp_support_state = PP_STATE_SUPPORT_LS;
5937                         pp_state = PP_STATE_LS;
5938                 }
5939                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5940                         pp_support_state |= PP_STATE_SUPPORT_CG;
5941                         pp_state |= PP_STATE_CG;
5942                 }
5943                 if (state == AMD_CG_STATE_UNGATE)
5944                         pp_state = 0;
5945
5946                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947                                 PP_BLOCK_GFX_3D,
5948                                 pp_support_state,
5949                                 pp_state);
5950                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5951                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5952         }
5953
5954         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5955                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5956                         pp_support_state = PP_STATE_SUPPORT_LS;
5957                         pp_state = PP_STATE_LS;
5958                 }
5959
5960                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5961                         pp_support_state |= PP_STATE_SUPPORT_CG;
5962                         pp_state |= PP_STATE_CG;
5963                 }
5964
5965                 if (state == AMD_CG_STATE_UNGATE)
5966                         pp_state = 0;
5967
5968                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5969                                 PP_BLOCK_GFX_MG,
5970                                 pp_support_state,
5971                                 pp_state);
5972                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5973                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5974         }
5975
5976         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5977                 pp_support_state = PP_STATE_SUPPORT_LS;
5978
5979                 if (state == AMD_CG_STATE_UNGATE)
5980                         pp_state = 0;
5981                 else
5982                         pp_state = PP_STATE_LS;
5983
5984                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5985                                 PP_BLOCK_GFX_RLC,
5986                                 pp_support_state,
5987                                 pp_state);
5988                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5989                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5990         }
5991
5992         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5993                 pp_support_state = PP_STATE_SUPPORT_LS;
5994
5995                 if (state == AMD_CG_STATE_UNGATE)
5996                         pp_state = 0;
5997                 else
5998                         pp_state = PP_STATE_LS;
5999                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6000                         PP_BLOCK_GFX_CP,
6001                         pp_support_state,
6002                         pp_state);
6003                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6004                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6005         }
6006
6007         return 0;
6008 }
6009
6010 static int gfx_v8_0_set_clockgating_state(void *handle,
6011                                           enum amd_clockgating_state state)
6012 {
6013         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6014
6015         if (amdgpu_sriov_vf(adev))
6016                 return 0;
6017
6018         switch (adev->asic_type) {
6019         case CHIP_FIJI:
6020         case CHIP_CARRIZO:
6021         case CHIP_STONEY:
6022                 gfx_v8_0_update_gfx_clock_gating(adev,
6023                                                  state == AMD_CG_STATE_GATE);
6024                 break;
6025         case CHIP_TONGA:
6026                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6027                 break;
6028         case CHIP_POLARIS10:
6029         case CHIP_POLARIS11:
6030         case CHIP_POLARIS12:
6031         case CHIP_VEGAM:
6032                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6033                 break;
6034         default:
6035                 break;
6036         }
6037         return 0;
6038 }
6039
6040 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6041 {
6042         return ring->adev->wb.wb[ring->rptr_offs];
6043 }
6044
6045 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6046 {
6047         struct amdgpu_device *adev = ring->adev;
6048
6049         if (ring->use_doorbell)
6050                 /* XXX check if swapping is necessary on BE */
6051                 return ring->adev->wb.wb[ring->wptr_offs];
6052         else
6053                 return RREG32(mmCP_RB0_WPTR);
6054 }
6055
6056 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6057 {
6058         struct amdgpu_device *adev = ring->adev;
6059
6060         if (ring->use_doorbell) {
6061                 /* XXX check if swapping is necessary on BE */
6062                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6063                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6064         } else {
6065                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6066                 (void)RREG32(mmCP_RB0_WPTR);
6067         }
6068 }
6069
6070 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6071 {
6072         u32 ref_and_mask, reg_mem_engine;
6073
6074         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6075             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6076                 switch (ring->me) {
6077                 case 1:
6078                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6079                         break;
6080                 case 2:
6081                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6082                         break;
6083                 default:
6084                         return;
6085                 }
6086                 reg_mem_engine = 0;
6087         } else {
6088                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6089                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6090         }
6091
6092         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6093         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6094                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6095                                  reg_mem_engine));
6096         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6097         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6098         amdgpu_ring_write(ring, ref_and_mask);
6099         amdgpu_ring_write(ring, ref_and_mask);
6100         amdgpu_ring_write(ring, 0x20); /* poll interval */
6101 }
6102
6103 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6104 {
6105         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6106         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6107                 EVENT_INDEX(4));
6108
6109         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6110         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6111                 EVENT_INDEX(0));
6112 }
6113
6114 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6115                                         struct amdgpu_job *job,
6116                                         struct amdgpu_ib *ib,
6117                                         uint32_t flags)
6118 {
6119         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6120         u32 header, control = 0;
6121
6122         if (ib->flags & AMDGPU_IB_FLAG_CE)
6123                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6124         else
6125                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6126
6127         control |= ib->length_dw | (vmid << 24);
6128
6129         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6130                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6131
6132                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6133                         gfx_v8_0_ring_emit_de_meta(ring);
6134         }
6135
6136         amdgpu_ring_write(ring, header);
6137         amdgpu_ring_write(ring,
6138 #ifdef __BIG_ENDIAN
6139                           (2 << 0) |
6140 #endif
6141                           (ib->gpu_addr & 0xFFFFFFFC));
6142         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6143         amdgpu_ring_write(ring, control);
6144 }
6145
6146 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6147                                           struct amdgpu_job *job,
6148                                           struct amdgpu_ib *ib,
6149                                           uint32_t flags)
6150 {
6151         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6152         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6153
6154         /* Currently, there is a high possibility to get wave ID mismatch
6155          * between ME and GDS, leading to a hw deadlock, because ME generates
6156          * different wave IDs than the GDS expects. This situation happens
6157          * randomly when at least 5 compute pipes use GDS ordered append.
6158          * The wave IDs generated by ME are also wrong after suspend/resume.
6159          * Those are probably bugs somewhere else in the kernel driver.
6160          *
6161          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6162          * GDS to 0 for this ring (me/pipe).
6163          */
6164         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6165                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6166                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6167                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6168         }
6169
6170         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6171         amdgpu_ring_write(ring,
6172 #ifdef __BIG_ENDIAN
6173                                 (2 << 0) |
6174 #endif
6175                                 (ib->gpu_addr & 0xFFFFFFFC));
6176         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6177         amdgpu_ring_write(ring, control);
6178 }
6179
6180 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6181                                          u64 seq, unsigned flags)
6182 {
6183         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6184         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6185
6186         /* EVENT_WRITE_EOP - flush caches, send int */
6187         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6188         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6189                                  EOP_TC_ACTION_EN |
6190                                  EOP_TC_WB_ACTION_EN |
6191                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6192                                  EVENT_INDEX(5)));
6193         amdgpu_ring_write(ring, addr & 0xfffffffc);
6194         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6195                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6196         amdgpu_ring_write(ring, lower_32_bits(seq));
6197         amdgpu_ring_write(ring, upper_32_bits(seq));
6198
6199 }
6200
6201 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6202 {
6203         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6204         uint32_t seq = ring->fence_drv.sync_seq;
6205         uint64_t addr = ring->fence_drv.gpu_addr;
6206
6207         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6208         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6209                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6210                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6211         amdgpu_ring_write(ring, addr & 0xfffffffc);
6212         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6213         amdgpu_ring_write(ring, seq);
6214         amdgpu_ring_write(ring, 0xffffffff);
6215         amdgpu_ring_write(ring, 4); /* poll interval */
6216 }
6217
6218 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6219                                         unsigned vmid, uint64_t pd_addr)
6220 {
6221         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6222
6223         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6224
6225         /* wait for the invalidate to complete */
6226         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6227         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6228                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6229                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6230         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6231         amdgpu_ring_write(ring, 0);
6232         amdgpu_ring_write(ring, 0); /* ref */
6233         amdgpu_ring_write(ring, 0); /* mask */
6234         amdgpu_ring_write(ring, 0x20); /* poll interval */
6235
6236         /* compute doesn't have PFP */
6237         if (usepfp) {
6238                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6239                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6240                 amdgpu_ring_write(ring, 0x0);
6241         }
6242 }
6243
6244 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6245 {
6246         return ring->adev->wb.wb[ring->wptr_offs];
6247 }
6248
6249 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6250 {
6251         struct amdgpu_device *adev = ring->adev;
6252
6253         /* XXX check if swapping is necessary on BE */
6254         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6255         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6256 }
6257
6258 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6259                                            bool acquire)
6260 {
6261         struct amdgpu_device *adev = ring->adev;
6262         int pipe_num, tmp, reg;
6263         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6264
6265         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6266
6267         /* first me only has 2 entries, GFX and HP3D */
6268         if (ring->me > 0)
6269                 pipe_num -= 2;
6270
6271         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6272         tmp = RREG32(reg);
6273         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6274         WREG32(reg, tmp);
6275 }
6276
6277 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6278                                             struct amdgpu_ring *ring,
6279                                             bool acquire)
6280 {
6281         int i, pipe;
6282         bool reserve;
6283         struct amdgpu_ring *iring;
6284
6285         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6286         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6287         if (acquire)
6288                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6289         else
6290                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6291
6292         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6293                 /* Clear all reservations - everyone reacquires all resources */
6294                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6295                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6296                                                        true);
6297
6298                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6299                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6300                                                        true);
6301         } else {
6302                 /* Lower all pipes without a current reservation */
6303                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6304                         iring = &adev->gfx.gfx_ring[i];
6305                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6306                                                            iring->me,
6307                                                            iring->pipe,
6308                                                            0);
6309                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6310                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6311                 }
6312
6313                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6314                         iring = &adev->gfx.compute_ring[i];
6315                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6316                                                            iring->me,
6317                                                            iring->pipe,
6318                                                            0);
6319                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6320                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6321                 }
6322         }
6323
6324         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6325 }
6326
6327 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6328                                       struct amdgpu_ring *ring,
6329                                       bool acquire)
6330 {
6331         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6332         uint32_t queue_priority = acquire ? 0xf : 0x0;
6333
6334         mutex_lock(&adev->srbm_mutex);
6335         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6336
6337         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6338         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6339
6340         vi_srbm_select(adev, 0, 0, 0, 0);
6341         mutex_unlock(&adev->srbm_mutex);
6342 }
6343 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6344                                                enum drm_sched_priority priority)
6345 {
6346         struct amdgpu_device *adev = ring->adev;
6347         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6348
6349         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6350                 return;
6351
6352         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6353         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6354 }
6355
6356 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6357                                              u64 addr, u64 seq,
6358                                              unsigned flags)
6359 {
6360         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6361         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6362
6363         /* RELEASE_MEM - flush caches, send int */
6364         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6365         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6366                                  EOP_TC_ACTION_EN |
6367                                  EOP_TC_WB_ACTION_EN |
6368                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6369                                  EVENT_INDEX(5)));
6370         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6371         amdgpu_ring_write(ring, addr & 0xfffffffc);
6372         amdgpu_ring_write(ring, upper_32_bits(addr));
6373         amdgpu_ring_write(ring, lower_32_bits(seq));
6374         amdgpu_ring_write(ring, upper_32_bits(seq));
6375 }
6376
6377 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6378                                          u64 seq, unsigned int flags)
6379 {
6380         /* we only allocate 32bit for each seq wb address */
6381         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6382
6383         /* write fence seq to the "addr" */
6384         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6385         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6386                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6387         amdgpu_ring_write(ring, lower_32_bits(addr));
6388         amdgpu_ring_write(ring, upper_32_bits(addr));
6389         amdgpu_ring_write(ring, lower_32_bits(seq));
6390
6391         if (flags & AMDGPU_FENCE_FLAG_INT) {
6392                 /* set register to trigger INT */
6393                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6394                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6395                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6396                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6397                 amdgpu_ring_write(ring, 0);
6398                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6399         }
6400 }
6401
6402 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6403 {
6404         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6405         amdgpu_ring_write(ring, 0);
6406 }
6407
6408 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6409 {
6410         uint32_t dw2 = 0;
6411
6412         if (amdgpu_sriov_vf(ring->adev))
6413                 gfx_v8_0_ring_emit_ce_meta(ring);
6414
6415         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6416         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6417                 gfx_v8_0_ring_emit_vgt_flush(ring);
6418                 /* set load_global_config & load_global_uconfig */
6419                 dw2 |= 0x8001;
6420                 /* set load_cs_sh_regs */
6421                 dw2 |= 0x01000000;
6422                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6423                 dw2 |= 0x10002;
6424
6425                 /* set load_ce_ram if preamble presented */
6426                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6427                         dw2 |= 0x10000000;
6428         } else {
6429                 /* still load_ce_ram if this is the first time preamble presented
6430                  * although there is no context switch happens.
6431                  */
6432                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6433                         dw2 |= 0x10000000;
6434         }
6435
6436         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6437         amdgpu_ring_write(ring, dw2);
6438         amdgpu_ring_write(ring, 0);
6439 }
6440
6441 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6442 {
6443         unsigned ret;
6444
6445         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6446         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6447         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6448         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6449         ret = ring->wptr & ring->buf_mask;
6450         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6451         return ret;
6452 }
6453
6454 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6455 {
6456         unsigned cur;
6457
6458         BUG_ON(offset > ring->buf_mask);
6459         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6460
6461         cur = (ring->wptr & ring->buf_mask) - 1;
6462         if (likely(cur > offset))
6463                 ring->ring[offset] = cur - offset;
6464         else
6465                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6466 }
6467
6468 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6469 {
6470         struct amdgpu_device *adev = ring->adev;
6471
6472         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6473         amdgpu_ring_write(ring, 0 |     /* src: register*/
6474                                 (5 << 8) |      /* dst: memory */
6475                                 (1 << 20));     /* write confirm */
6476         amdgpu_ring_write(ring, reg);
6477         amdgpu_ring_write(ring, 0);
6478         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6479                                 adev->virt.reg_val_offs * 4));
6480         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6481                                 adev->virt.reg_val_offs * 4));
6482 }
6483
6484 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6485                                   uint32_t val)
6486 {
6487         uint32_t cmd;
6488
6489         switch (ring->funcs->type) {
6490         case AMDGPU_RING_TYPE_GFX:
6491                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6492                 break;
6493         case AMDGPU_RING_TYPE_KIQ:
6494                 cmd = 1 << 16; /* no inc addr */
6495                 break;
6496         default:
6497                 cmd = WR_CONFIRM;
6498                 break;
6499         }
6500
6501         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6502         amdgpu_ring_write(ring, cmd);
6503         amdgpu_ring_write(ring, reg);
6504         amdgpu_ring_write(ring, 0);
6505         amdgpu_ring_write(ring, val);
6506 }
6507
6508 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6509 {
6510         struct amdgpu_device *adev = ring->adev;
6511         uint32_t value = 0;
6512
6513         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6514         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6515         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6516         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6517         WREG32(mmSQ_CMD, value);
6518 }
6519
6520 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6521                                                  enum amdgpu_interrupt_state state)
6522 {
6523         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6524                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6525 }
6526
6527 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6528                                                      int me, int pipe,
6529                                                      enum amdgpu_interrupt_state state)
6530 {
6531         u32 mec_int_cntl, mec_int_cntl_reg;
6532
6533         /*
6534          * amdgpu controls only the first MEC. That's why this function only
6535          * handles the setting of interrupts for this specific MEC. All other
6536          * pipes' interrupts are set by amdkfd.
6537          */
6538
6539         if (me == 1) {
6540                 switch (pipe) {
6541                 case 0:
6542                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6543                         break;
6544                 case 1:
6545                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6546                         break;
6547                 case 2:
6548                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6549                         break;
6550                 case 3:
6551                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6552                         break;
6553                 default:
6554                         DRM_DEBUG("invalid pipe %d\n", pipe);
6555                         return;
6556                 }
6557         } else {
6558                 DRM_DEBUG("invalid me %d\n", me);
6559                 return;
6560         }
6561
6562         switch (state) {
6563         case AMDGPU_IRQ_STATE_DISABLE:
6564                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6565                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6566                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6567                 break;
6568         case AMDGPU_IRQ_STATE_ENABLE:
6569                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6570                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6571                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6572                 break;
6573         default:
6574                 break;
6575         }
6576 }
6577
6578 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6579                                              struct amdgpu_irq_src *source,
6580                                              unsigned type,
6581                                              enum amdgpu_interrupt_state state)
6582 {
6583         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6584                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6585
6586         return 0;
6587 }
6588
6589 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6590                                               struct amdgpu_irq_src *source,
6591                                               unsigned type,
6592                                               enum amdgpu_interrupt_state state)
6593 {
6594         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6595                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6596
6597         return 0;
6598 }
6599
6600 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6601                                             struct amdgpu_irq_src *src,
6602                                             unsigned type,
6603                                             enum amdgpu_interrupt_state state)
6604 {
6605         switch (type) {
6606         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6607                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6608                 break;
6609         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6610                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6611                 break;
6612         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6613                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6614                 break;
6615         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6616                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6617                 break;
6618         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6619                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6620                 break;
6621         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6622                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6623                 break;
6624         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6625                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6626                 break;
6627         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6628                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6629                 break;
6630         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6631                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6632                 break;
6633         default:
6634                 break;
6635         }
6636         return 0;
6637 }
6638
6639 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6640                                          struct amdgpu_irq_src *source,
6641                                          unsigned int type,
6642                                          enum amdgpu_interrupt_state state)
6643 {
6644         int enable_flag;
6645
6646         switch (state) {
6647         case AMDGPU_IRQ_STATE_DISABLE:
6648                 enable_flag = 0;
6649                 break;
6650
6651         case AMDGPU_IRQ_STATE_ENABLE:
6652                 enable_flag = 1;
6653                 break;
6654
6655         default:
6656                 return -EINVAL;
6657         }
6658
6659         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6660         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6661         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6662         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6663         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6664         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6665                      enable_flag);
6666         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6667                      enable_flag);
6668         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6669                      enable_flag);
6670         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6671                      enable_flag);
6672         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6673                      enable_flag);
6674         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6675                      enable_flag);
6676         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6677                      enable_flag);
6678         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6679                      enable_flag);
6680
6681         return 0;
6682 }
6683
6684 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6685                                      struct amdgpu_irq_src *source,
6686                                      unsigned int type,
6687                                      enum amdgpu_interrupt_state state)
6688 {
6689         int enable_flag;
6690
6691         switch (state) {
6692         case AMDGPU_IRQ_STATE_DISABLE:
6693                 enable_flag = 1;
6694                 break;
6695
6696         case AMDGPU_IRQ_STATE_ENABLE:
6697                 enable_flag = 0;
6698                 break;
6699
6700         default:
6701                 return -EINVAL;
6702         }
6703
6704         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6705                      enable_flag);
6706
6707         return 0;
6708 }
6709
6710 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6711                             struct amdgpu_irq_src *source,
6712                             struct amdgpu_iv_entry *entry)
6713 {
6714         int i;
6715         u8 me_id, pipe_id, queue_id;
6716         struct amdgpu_ring *ring;
6717
6718         DRM_DEBUG("IH: CP EOP\n");
6719         me_id = (entry->ring_id & 0x0c) >> 2;
6720         pipe_id = (entry->ring_id & 0x03) >> 0;
6721         queue_id = (entry->ring_id & 0x70) >> 4;
6722
6723         switch (me_id) {
6724         case 0:
6725                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6726                 break;
6727         case 1:
6728         case 2:
6729                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6730                         ring = &adev->gfx.compute_ring[i];
6731                         /* Per-queue interrupt is supported for MEC starting from VI.
6732                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6733                           */
6734                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6735                                 amdgpu_fence_process(ring);
6736                 }
6737                 break;
6738         }
6739         return 0;
6740 }
6741
6742 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6743                            struct amdgpu_iv_entry *entry)
6744 {
6745         u8 me_id, pipe_id, queue_id;
6746         struct amdgpu_ring *ring;
6747         int i;
6748
6749         me_id = (entry->ring_id & 0x0c) >> 2;
6750         pipe_id = (entry->ring_id & 0x03) >> 0;
6751         queue_id = (entry->ring_id & 0x70) >> 4;
6752
6753         switch (me_id) {
6754         case 0:
6755                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6756                 break;
6757         case 1:
6758         case 2:
6759                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6760                         ring = &adev->gfx.compute_ring[i];
6761                         if (ring->me == me_id && ring->pipe == pipe_id &&
6762                             ring->queue == queue_id)
6763                                 drm_sched_fault(&ring->sched);
6764                 }
6765                 break;
6766         }
6767 }
6768
6769 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6770                                  struct amdgpu_irq_src *source,
6771                                  struct amdgpu_iv_entry *entry)
6772 {
6773         DRM_ERROR("Illegal register access in command stream\n");
6774         gfx_v8_0_fault(adev, entry);
6775         return 0;
6776 }
6777
6778 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6779                                   struct amdgpu_irq_src *source,
6780                                   struct amdgpu_iv_entry *entry)
6781 {
6782         DRM_ERROR("Illegal instruction in command stream\n");
6783         gfx_v8_0_fault(adev, entry);
6784         return 0;
6785 }
6786
6787 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6788                                      struct amdgpu_irq_src *source,
6789                                      struct amdgpu_iv_entry *entry)
6790 {
6791         DRM_ERROR("CP EDC/ECC error detected.");
6792         return 0;
6793 }
6794
6795 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6796 {
6797         u32 enc, se_id, sh_id, cu_id;
6798         char type[20];
6799         int sq_edc_source = -1;
6800
6801         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6802         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6803
6804         switch (enc) {
6805                 case 0:
6806                         DRM_INFO("SQ general purpose intr detected:"
6807                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6808                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6809                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6810                                         "wlt %d, thread_trace %d.\n",
6811                                         se_id,
6812                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6813                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6814                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6815                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6816                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6817                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6818                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6819                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6820                                         );
6821                         break;
6822                 case 1:
6823                 case 2:
6824
6825                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6826                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6827
6828                         /*
6829                          * This function can be called either directly from ISR
6830                          * or from BH in which case we can access SQ_EDC_INFO
6831                          * instance
6832                          */
6833                         if (in_task()) {
6834                                 mutex_lock(&adev->grbm_idx_mutex);
6835                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6836
6837                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6838
6839                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6840                                 mutex_unlock(&adev->grbm_idx_mutex);
6841                         }
6842
6843                         if (enc == 1)
6844                                 sprintf(type, "instruction intr");
6845                         else
6846                                 sprintf(type, "EDC/ECC error");
6847
6848                         DRM_INFO(
6849                                 "SQ %s detected: "
6850                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6851                                         "trap %s, sq_ed_info.source %s.\n",
6852                                         type, se_id, sh_id, cu_id,
6853                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6854                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6855                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6856                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6857                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6858                                 );
6859                         break;
6860                 default:
6861                         DRM_ERROR("SQ invalid encoding type\n.");
6862         }
6863 }
6864
6865 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6866 {
6867
6868         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6869         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6870
6871         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6872 }
6873
6874 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6875                            struct amdgpu_irq_src *source,
6876                            struct amdgpu_iv_entry *entry)
6877 {
6878         unsigned ih_data = entry->src_data[0];
6879
6880         /*
6881          * Try to submit work so SQ_EDC_INFO can be accessed from
6882          * BH. If previous work submission hasn't finished yet
6883          * just print whatever info is possible directly from the ISR.
6884          */
6885         if (work_pending(&adev->gfx.sq_work.work)) {
6886                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6887         } else {
6888                 adev->gfx.sq_work.ih_data = ih_data;
6889                 schedule_work(&adev->gfx.sq_work.work);
6890         }
6891
6892         return 0;
6893 }
6894
6895 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6896         .name = "gfx_v8_0",
6897         .early_init = gfx_v8_0_early_init,
6898         .late_init = gfx_v8_0_late_init,
6899         .sw_init = gfx_v8_0_sw_init,
6900         .sw_fini = gfx_v8_0_sw_fini,
6901         .hw_init = gfx_v8_0_hw_init,
6902         .hw_fini = gfx_v8_0_hw_fini,
6903         .suspend = gfx_v8_0_suspend,
6904         .resume = gfx_v8_0_resume,
6905         .is_idle = gfx_v8_0_is_idle,
6906         .wait_for_idle = gfx_v8_0_wait_for_idle,
6907         .check_soft_reset = gfx_v8_0_check_soft_reset,
6908         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6909         .soft_reset = gfx_v8_0_soft_reset,
6910         .post_soft_reset = gfx_v8_0_post_soft_reset,
6911         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6912         .set_powergating_state = gfx_v8_0_set_powergating_state,
6913         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6914 };
6915
6916 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6917         .type = AMDGPU_RING_TYPE_GFX,
6918         .align_mask = 0xff,
6919         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6920         .support_64bit_ptrs = false,
6921         .get_rptr = gfx_v8_0_ring_get_rptr,
6922         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6923         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6924         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6925                 5 +  /* COND_EXEC */
6926                 7 +  /* PIPELINE_SYNC */
6927                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6928                 8 +  /* FENCE for VM_FLUSH */
6929                 20 + /* GDS switch */
6930                 4 + /* double SWITCH_BUFFER,
6931                        the first COND_EXEC jump to the place just
6932                            prior to this double SWITCH_BUFFER  */
6933                 5 + /* COND_EXEC */
6934                 7 +      /*     HDP_flush */
6935                 4 +      /*     VGT_flush */
6936                 14 + /* CE_META */
6937                 31 + /* DE_META */
6938                 3 + /* CNTX_CTRL */
6939                 5 + /* HDP_INVL */
6940                 8 + 8 + /* FENCE x2 */
6941                 2, /* SWITCH_BUFFER */
6942         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6943         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6944         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6945         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6946         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6947         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6948         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6949         .test_ring = gfx_v8_0_ring_test_ring,
6950         .test_ib = gfx_v8_0_ring_test_ib,
6951         .insert_nop = amdgpu_ring_insert_nop,
6952         .pad_ib = amdgpu_ring_generic_pad_ib,
6953         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6954         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6955         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6956         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6957         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6958         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6959 };
6960
6961 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6962         .type = AMDGPU_RING_TYPE_COMPUTE,
6963         .align_mask = 0xff,
6964         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6965         .support_64bit_ptrs = false,
6966         .get_rptr = gfx_v8_0_ring_get_rptr,
6967         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6968         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6969         .emit_frame_size =
6970                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6971                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6972                 5 + /* hdp_invalidate */
6973                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6974                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6975                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6976         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6977         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6978         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6979         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6980         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6981         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6982         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6983         .test_ring = gfx_v8_0_ring_test_ring,
6984         .test_ib = gfx_v8_0_ring_test_ib,
6985         .insert_nop = amdgpu_ring_insert_nop,
6986         .pad_ib = amdgpu_ring_generic_pad_ib,
6987         .set_priority = gfx_v8_0_ring_set_priority_compute,
6988         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6989 };
6990
6991 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6992         .type = AMDGPU_RING_TYPE_KIQ,
6993         .align_mask = 0xff,
6994         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6995         .support_64bit_ptrs = false,
6996         .get_rptr = gfx_v8_0_ring_get_rptr,
6997         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6998         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6999         .emit_frame_size =
7000                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7001                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7002                 5 + /* hdp_invalidate */
7003                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7004                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7005                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7006         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7007         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7008         .test_ring = gfx_v8_0_ring_test_ring,
7009         .insert_nop = amdgpu_ring_insert_nop,
7010         .pad_ib = amdgpu_ring_generic_pad_ib,
7011         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7012         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7013 };
7014
7015 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7016 {
7017         int i;
7018
7019         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7020
7021         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7022                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7023
7024         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7025                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7026 }
7027
7028 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7029         .set = gfx_v8_0_set_eop_interrupt_state,
7030         .process = gfx_v8_0_eop_irq,
7031 };
7032
7033 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7034         .set = gfx_v8_0_set_priv_reg_fault_state,
7035         .process = gfx_v8_0_priv_reg_irq,
7036 };
7037
7038 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7039         .set = gfx_v8_0_set_priv_inst_fault_state,
7040         .process = gfx_v8_0_priv_inst_irq,
7041 };
7042
7043 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7044         .set = gfx_v8_0_set_cp_ecc_int_state,
7045         .process = gfx_v8_0_cp_ecc_error_irq,
7046 };
7047
7048 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7049         .set = gfx_v8_0_set_sq_int_state,
7050         .process = gfx_v8_0_sq_irq,
7051 };
7052
7053 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7054 {
7055         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7056         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7057
7058         adev->gfx.priv_reg_irq.num_types = 1;
7059         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7060
7061         adev->gfx.priv_inst_irq.num_types = 1;
7062         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7063
7064         adev->gfx.cp_ecc_error_irq.num_types = 1;
7065         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7066
7067         adev->gfx.sq_irq.num_types = 1;
7068         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7069 }
7070
7071 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7072 {
7073         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7074 }
7075
7076 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7077 {
7078         /* init asci gds info */
7079         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7080         adev->gds.gws_size = 64;
7081         adev->gds.oa_size = 16;
7082         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7083 }
7084
7085 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7086                                                  u32 bitmap)
7087 {
7088         u32 data;
7089
7090         if (!bitmap)
7091                 return;
7092
7093         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7094         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7095
7096         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7097 }
7098
7099 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7100 {
7101         u32 data, mask;
7102
7103         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7104                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7105
7106         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7107
7108         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7109 }
7110
7111 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7112 {
7113         int i, j, k, counter, active_cu_number = 0;
7114         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7115         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7116         unsigned disable_masks[4 * 2];
7117         u32 ao_cu_num;
7118
7119         memset(cu_info, 0, sizeof(*cu_info));
7120
7121         if (adev->flags & AMD_IS_APU)
7122                 ao_cu_num = 2;
7123         else
7124                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7125
7126         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7127
7128         mutex_lock(&adev->grbm_idx_mutex);
7129         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7130                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7131                         mask = 1;
7132                         ao_bitmap = 0;
7133                         counter = 0;
7134                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7135                         if (i < 4 && j < 2)
7136                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7137                                         adev, disable_masks[i * 2 + j]);
7138                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7139                         cu_info->bitmap[i][j] = bitmap;
7140
7141                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7142                                 if (bitmap & mask) {
7143                                         if (counter < ao_cu_num)
7144                                                 ao_bitmap |= mask;
7145                                         counter ++;
7146                                 }
7147                                 mask <<= 1;
7148                         }
7149                         active_cu_number += counter;
7150                         if (i < 2 && j < 2)
7151                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7152                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7153                 }
7154         }
7155         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7156         mutex_unlock(&adev->grbm_idx_mutex);
7157
7158         cu_info->number = active_cu_number;
7159         cu_info->ao_cu_mask = ao_cu_mask;
7160         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7161         cu_info->max_waves_per_simd = 10;
7162         cu_info->max_scratch_slots_per_cu = 32;
7163         cu_info->wave_front_size = 64;
7164         cu_info->lds_size = 64;
7165 }
7166
7167 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7168 {
7169         .type = AMD_IP_BLOCK_TYPE_GFX,
7170         .major = 8,
7171         .minor = 0,
7172         .rev = 0,
7173         .funcs = &gfx_v8_0_ip_funcs,
7174 };
7175
7176 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7177 {
7178         .type = AMD_IP_BLOCK_TYPE_GFX,
7179         .major = 8,
7180         .minor = 1,
7181         .rev = 0,
7182         .funcs = &gfx_v8_0_ip_funcs,
7183 };
7184
7185 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7186 {
7187         uint64_t ce_payload_addr;
7188         int cnt_ce;
7189         union {
7190                 struct vi_ce_ib_state regular;
7191                 struct vi_ce_ib_state_chained_ib chained;
7192         } ce_payload = {};
7193
7194         if (ring->adev->virt.chained_ib_support) {
7195                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7196                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7197                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7198         } else {
7199                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7200                         offsetof(struct vi_gfx_meta_data, ce_payload);
7201                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7202         }
7203
7204         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7205         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7206                                 WRITE_DATA_DST_SEL(8) |
7207                                 WR_CONFIRM) |
7208                                 WRITE_DATA_CACHE_POLICY(0));
7209         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7210         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7211         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7212 }
7213
7214 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7215 {
7216         uint64_t de_payload_addr, gds_addr, csa_addr;
7217         int cnt_de;
7218         union {
7219                 struct vi_de_ib_state regular;
7220                 struct vi_de_ib_state_chained_ib chained;
7221         } de_payload = {};
7222
7223         csa_addr = amdgpu_csa_vaddr(ring->adev);
7224         gds_addr = csa_addr + 4096;
7225         if (ring->adev->virt.chained_ib_support) {
7226                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7227                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7228                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7229                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7230         } else {
7231                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7232                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7233                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7234                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7235         }
7236
7237         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7238         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7239                                 WRITE_DATA_DST_SEL(8) |
7240                                 WR_CONFIRM) |
7241                                 WRITE_DATA_CACHE_POLICY(0));
7242         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7243         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7244         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7245 }