drm/amdgpu: implement more ib pools (v2)
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734         switch (adev->asic_type) {
735         case CHIP_TOPAZ:
736                 amdgpu_device_program_register_sequence(adev,
737                                                         iceland_mgcg_cgcg_init,
738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         golden_settings_iceland_a11,
741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_golden_common_all,
744                                                         ARRAY_SIZE(iceland_golden_common_all));
745                 break;
746         case CHIP_FIJI:
747                 amdgpu_device_program_register_sequence(adev,
748                                                         fiji_mgcg_cgcg_init,
749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         golden_settings_fiji_a10,
752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_golden_common_all,
755                                                         ARRAY_SIZE(fiji_golden_common_all));
756                 break;
757
758         case CHIP_TONGA:
759                 amdgpu_device_program_register_sequence(adev,
760                                                         tonga_mgcg_cgcg_init,
761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         golden_settings_tonga_a11,
764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_golden_common_all,
767                                                         ARRAY_SIZE(tonga_golden_common_all));
768                 break;
769         case CHIP_VEGAM:
770                 amdgpu_device_program_register_sequence(adev,
771                                                         golden_settings_vegam_a11,
772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
773                 amdgpu_device_program_register_sequence(adev,
774                                                         vegam_golden_common_all,
775                                                         ARRAY_SIZE(vegam_golden_common_all));
776                 break;
777         case CHIP_POLARIS11:
778         case CHIP_POLARIS12:
779                 amdgpu_device_program_register_sequence(adev,
780                                                         golden_settings_polaris11_a11,
781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
782                 amdgpu_device_program_register_sequence(adev,
783                                                         polaris11_golden_common_all,
784                                                         ARRAY_SIZE(polaris11_golden_common_all));
785                 break;
786         case CHIP_POLARIS10:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         golden_settings_polaris10_a11,
789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         polaris10_golden_common_all,
792                                                         ARRAY_SIZE(polaris10_golden_common_all));
793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794                 if (adev->pdev->revision == 0xc7 &&
795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800                 }
801                 break;
802         case CHIP_CARRIZO:
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_mgcg_cgcg_init,
805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_settings_a11,
808                                                         ARRAY_SIZE(cz_golden_settings_a11));
809                 amdgpu_device_program_register_sequence(adev,
810                                                         cz_golden_common_all,
811                                                         ARRAY_SIZE(cz_golden_common_all));
812                 break;
813         case CHIP_STONEY:
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_mgcg_cgcg_init,
816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_settings_a11,
819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
820                 amdgpu_device_program_register_sequence(adev,
821                                                         stoney_golden_common_all,
822                                                         ARRAY_SIZE(stoney_golden_common_all));
823                 break;
824         default:
825                 break;
826         }
827 }
828
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831         adev->gfx.scratch.num_reg = 8;
832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838         struct amdgpu_device *adev = ring->adev;
839         uint32_t scratch;
840         uint32_t tmp = 0;
841         unsigned i;
842         int r;
843
844         r = amdgpu_gfx_scratch_get(adev, &scratch);
845         if (r)
846                 return r;
847
848         WREG32(scratch, 0xCAFEDEAD);
849         r = amdgpu_ring_alloc(ring, 3);
850         if (r)
851                 goto error_free_scratch;
852
853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855         amdgpu_ring_write(ring, 0xDEADBEEF);
856         amdgpu_ring_commit(ring);
857
858         for (i = 0; i < adev->usec_timeout; i++) {
859                 tmp = RREG32(scratch);
860                 if (tmp == 0xDEADBEEF)
861                         break;
862                 udelay(1);
863         }
864
865         if (i >= adev->usec_timeout)
866                 r = -ETIMEDOUT;
867
868 error_free_scratch:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875         struct amdgpu_device *adev = ring->adev;
876         struct amdgpu_ib ib;
877         struct dma_fence *f = NULL;
878
879         unsigned int index;
880         uint64_t gpu_addr;
881         uint32_t tmp;
882         long r;
883
884         r = amdgpu_device_wb_get(adev, &index);
885         if (r)
886                 return r;
887
888         gpu_addr = adev->wb.gpu_addr + (index * 4);
889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890         memset(&ib, 0, sizeof(ib));
891         r = amdgpu_ib_get(adev, NULL, 16,
892                                         AMDGPU_IB_POOL_DIRECT, &ib);
893         if (r)
894                 goto err1;
895
896         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
897         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
898         ib.ptr[2] = lower_32_bits(gpu_addr);
899         ib.ptr[3] = upper_32_bits(gpu_addr);
900         ib.ptr[4] = 0xDEADBEEF;
901         ib.length_dw = 5;
902
903         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
904         if (r)
905                 goto err2;
906
907         r = dma_fence_wait_timeout(f, false, timeout);
908         if (r == 0) {
909                 r = -ETIMEDOUT;
910                 goto err2;
911         } else if (r < 0) {
912                 goto err2;
913         }
914
915         tmp = adev->wb.wb[index];
916         if (tmp == 0xDEADBEEF)
917                 r = 0;
918         else
919                 r = -EINVAL;
920
921 err2:
922         amdgpu_ib_free(adev, &ib, NULL);
923         dma_fence_put(f);
924 err1:
925         amdgpu_device_wb_free(adev, index);
926         return r;
927 }
928
929
930 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
931 {
932         release_firmware(adev->gfx.pfp_fw);
933         adev->gfx.pfp_fw = NULL;
934         release_firmware(adev->gfx.me_fw);
935         adev->gfx.me_fw = NULL;
936         release_firmware(adev->gfx.ce_fw);
937         adev->gfx.ce_fw = NULL;
938         release_firmware(adev->gfx.rlc_fw);
939         adev->gfx.rlc_fw = NULL;
940         release_firmware(adev->gfx.mec_fw);
941         adev->gfx.mec_fw = NULL;
942         if ((adev->asic_type != CHIP_STONEY) &&
943             (adev->asic_type != CHIP_TOPAZ))
944                 release_firmware(adev->gfx.mec2_fw);
945         adev->gfx.mec2_fw = NULL;
946
947         kfree(adev->gfx.rlc.register_list_format);
948 }
949
950 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
951 {
952         const char *chip_name;
953         char fw_name[30];
954         int err;
955         struct amdgpu_firmware_info *info = NULL;
956         const struct common_firmware_header *header = NULL;
957         const struct gfx_firmware_header_v1_0 *cp_hdr;
958         const struct rlc_firmware_header_v2_0 *rlc_hdr;
959         unsigned int *tmp = NULL, i;
960
961         DRM_DEBUG("\n");
962
963         switch (adev->asic_type) {
964         case CHIP_TOPAZ:
965                 chip_name = "topaz";
966                 break;
967         case CHIP_TONGA:
968                 chip_name = "tonga";
969                 break;
970         case CHIP_CARRIZO:
971                 chip_name = "carrizo";
972                 break;
973         case CHIP_FIJI:
974                 chip_name = "fiji";
975                 break;
976         case CHIP_STONEY:
977                 chip_name = "stoney";
978                 break;
979         case CHIP_POLARIS10:
980                 chip_name = "polaris10";
981                 break;
982         case CHIP_POLARIS11:
983                 chip_name = "polaris11";
984                 break;
985         case CHIP_POLARIS12:
986                 chip_name = "polaris12";
987                 break;
988         case CHIP_VEGAM:
989                 chip_name = "vegam";
990                 break;
991         default:
992                 BUG();
993         }
994
995         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
996                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
997                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
998                 if (err == -ENOENT) {
999                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1000                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1001                 }
1002         } else {
1003                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1004                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1005         }
1006         if (err)
1007                 goto out;
1008         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1009         if (err)
1010                 goto out;
1011         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1012         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1013         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1014
1015         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1016                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1017                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1018                 if (err == -ENOENT) {
1019                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1020                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1021                 }
1022         } else {
1023                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1024                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1025         }
1026         if (err)
1027                 goto out;
1028         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1029         if (err)
1030                 goto out;
1031         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1032         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1033
1034         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1035
1036         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1037                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1038                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1039                 if (err == -ENOENT) {
1040                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1041                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1042                 }
1043         } else {
1044                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1045                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1046         }
1047         if (err)
1048                 goto out;
1049         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1050         if (err)
1051                 goto out;
1052         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1053         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1054         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1055
1056         /*
1057          * Support for MCBP/Virtualization in combination with chained IBs is
1058          * formal released on feature version #46
1059          */
1060         if (adev->gfx.ce_feature_version >= 46 &&
1061             adev->gfx.pfp_feature_version >= 46) {
1062                 adev->virt.chained_ib_support = true;
1063                 DRM_INFO("Chained IB support enabled!\n");
1064         } else
1065                 adev->virt.chained_ib_support = false;
1066
1067         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1068         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1069         if (err)
1070                 goto out;
1071         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1072         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1073         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1074         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1075
1076         adev->gfx.rlc.save_and_restore_offset =
1077                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1078         adev->gfx.rlc.clear_state_descriptor_offset =
1079                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1080         adev->gfx.rlc.avail_scratch_ram_locations =
1081                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1082         adev->gfx.rlc.reg_restore_list_size =
1083                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1084         adev->gfx.rlc.reg_list_format_start =
1085                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1086         adev->gfx.rlc.reg_list_format_separate_start =
1087                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1088         adev->gfx.rlc.starting_offsets_start =
1089                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1090         adev->gfx.rlc.reg_list_format_size_bytes =
1091                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1092         adev->gfx.rlc.reg_list_size_bytes =
1093                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1094
1095         adev->gfx.rlc.register_list_format =
1096                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1097                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1098
1099         if (!adev->gfx.rlc.register_list_format) {
1100                 err = -ENOMEM;
1101                 goto out;
1102         }
1103
1104         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1105                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1106         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1107                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1108
1109         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1110
1111         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1112                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1113         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1114                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1115
1116         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1117                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1118                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1119                 if (err == -ENOENT) {
1120                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1121                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1122                 }
1123         } else {
1124                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1125                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1126         }
1127         if (err)
1128                 goto out;
1129         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1130         if (err)
1131                 goto out;
1132         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1133         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1134         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1135
1136         if ((adev->asic_type != CHIP_STONEY) &&
1137             (adev->asic_type != CHIP_TOPAZ)) {
1138                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1139                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1140                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1141                         if (err == -ENOENT) {
1142                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1143                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1144                         }
1145                 } else {
1146                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1147                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1148                 }
1149                 if (!err) {
1150                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1151                         if (err)
1152                                 goto out;
1153                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1154                                 adev->gfx.mec2_fw->data;
1155                         adev->gfx.mec2_fw_version =
1156                                 le32_to_cpu(cp_hdr->header.ucode_version);
1157                         adev->gfx.mec2_feature_version =
1158                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1159                 } else {
1160                         err = 0;
1161                         adev->gfx.mec2_fw = NULL;
1162                 }
1163         }
1164
1165         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1166         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1167         info->fw = adev->gfx.pfp_fw;
1168         header = (const struct common_firmware_header *)info->fw->data;
1169         adev->firmware.fw_size +=
1170                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1171
1172         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1173         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1174         info->fw = adev->gfx.me_fw;
1175         header = (const struct common_firmware_header *)info->fw->data;
1176         adev->firmware.fw_size +=
1177                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1178
1179         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1180         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1181         info->fw = adev->gfx.ce_fw;
1182         header = (const struct common_firmware_header *)info->fw->data;
1183         adev->firmware.fw_size +=
1184                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1185
1186         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188         info->fw = adev->gfx.rlc_fw;
1189         header = (const struct common_firmware_header *)info->fw->data;
1190         adev->firmware.fw_size +=
1191                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1194         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1195         info->fw = adev->gfx.mec_fw;
1196         header = (const struct common_firmware_header *)info->fw->data;
1197         adev->firmware.fw_size +=
1198                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199
1200         /* we need account JT in */
1201         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1202         adev->firmware.fw_size +=
1203                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1204
1205         if (amdgpu_sriov_vf(adev)) {
1206                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1207                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1208                 info->fw = adev->gfx.mec_fw;
1209                 adev->firmware.fw_size +=
1210                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1211         }
1212
1213         if (adev->gfx.mec2_fw) {
1214                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1215                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1216                 info->fw = adev->gfx.mec2_fw;
1217                 header = (const struct common_firmware_header *)info->fw->data;
1218                 adev->firmware.fw_size +=
1219                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1220         }
1221
1222 out:
1223         if (err) {
1224                 dev_err(adev->dev,
1225                         "gfx8: Failed to load firmware \"%s\"\n",
1226                         fw_name);
1227                 release_firmware(adev->gfx.pfp_fw);
1228                 adev->gfx.pfp_fw = NULL;
1229                 release_firmware(adev->gfx.me_fw);
1230                 adev->gfx.me_fw = NULL;
1231                 release_firmware(adev->gfx.ce_fw);
1232                 adev->gfx.ce_fw = NULL;
1233                 release_firmware(adev->gfx.rlc_fw);
1234                 adev->gfx.rlc_fw = NULL;
1235                 release_firmware(adev->gfx.mec_fw);
1236                 adev->gfx.mec_fw = NULL;
1237                 release_firmware(adev->gfx.mec2_fw);
1238                 adev->gfx.mec2_fw = NULL;
1239         }
1240         return err;
1241 }
1242
1243 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1244                                     volatile u32 *buffer)
1245 {
1246         u32 count = 0, i;
1247         const struct cs_section_def *sect = NULL;
1248         const struct cs_extent_def *ext = NULL;
1249
1250         if (adev->gfx.rlc.cs_data == NULL)
1251                 return;
1252         if (buffer == NULL)
1253                 return;
1254
1255         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1256         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1257
1258         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260         buffer[count++] = cpu_to_le32(0x80000000);
1261
1262         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1263                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1264                         if (sect->id == SECT_CONTEXT) {
1265                                 buffer[count++] =
1266                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1267                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1268                                                 PACKET3_SET_CONTEXT_REG_START);
1269                                 for (i = 0; i < ext->reg_count; i++)
1270                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1271                         } else {
1272                                 return;
1273                         }
1274                 }
1275         }
1276
1277         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1278         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1279                         PACKET3_SET_CONTEXT_REG_START);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1281         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1282
1283         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1284         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1285
1286         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1287         buffer[count++] = cpu_to_le32(0);
1288 }
1289
1290 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1291 {
1292         if (adev->asic_type == CHIP_CARRIZO)
1293                 return 5;
1294         else
1295                 return 4;
1296 }
1297
1298 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1299 {
1300         const struct cs_section_def *cs_data;
1301         int r;
1302
1303         adev->gfx.rlc.cs_data = vi_cs_data;
1304
1305         cs_data = adev->gfx.rlc.cs_data;
1306
1307         if (cs_data) {
1308                 /* init clear state block */
1309                 r = amdgpu_gfx_rlc_init_csb(adev);
1310                 if (r)
1311                         return r;
1312         }
1313
1314         if ((adev->asic_type == CHIP_CARRIZO) ||
1315             (adev->asic_type == CHIP_STONEY)) {
1316                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1317                 r = amdgpu_gfx_rlc_init_cpt(adev);
1318                 if (r)
1319                         return r;
1320         }
1321
1322         /* init spm vmid with 0xf */
1323         if (adev->gfx.rlc.funcs->update_spm_vmid)
1324                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1325
1326         return 0;
1327 }
1328
1329 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1330 {
1331         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1332 }
1333
1334 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1335 {
1336         int r;
1337         u32 *hpd;
1338         size_t mec_hpd_size;
1339
1340         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1341
1342         /* take ownership of the relevant compute queues */
1343         amdgpu_gfx_compute_queue_acquire(adev);
1344
1345         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1346
1347         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1348                                       AMDGPU_GEM_DOMAIN_VRAM,
1349                                       &adev->gfx.mec.hpd_eop_obj,
1350                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1351                                       (void **)&hpd);
1352         if (r) {
1353                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1354                 return r;
1355         }
1356
1357         memset(hpd, 0, mec_hpd_size);
1358
1359         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1360         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1361
1362         return 0;
1363 }
1364
1365 static const u32 vgpr_init_compute_shader[] =
1366 {
1367         0x7e000209, 0x7e020208,
1368         0x7e040207, 0x7e060206,
1369         0x7e080205, 0x7e0a0204,
1370         0x7e0c0203, 0x7e0e0202,
1371         0x7e100201, 0x7e120200,
1372         0x7e140209, 0x7e160208,
1373         0x7e180207, 0x7e1a0206,
1374         0x7e1c0205, 0x7e1e0204,
1375         0x7e200203, 0x7e220202,
1376         0x7e240201, 0x7e260200,
1377         0x7e280209, 0x7e2a0208,
1378         0x7e2c0207, 0x7e2e0206,
1379         0x7e300205, 0x7e320204,
1380         0x7e340203, 0x7e360202,
1381         0x7e380201, 0x7e3a0200,
1382         0x7e3c0209, 0x7e3e0208,
1383         0x7e400207, 0x7e420206,
1384         0x7e440205, 0x7e460204,
1385         0x7e480203, 0x7e4a0202,
1386         0x7e4c0201, 0x7e4e0200,
1387         0x7e500209, 0x7e520208,
1388         0x7e540207, 0x7e560206,
1389         0x7e580205, 0x7e5a0204,
1390         0x7e5c0203, 0x7e5e0202,
1391         0x7e600201, 0x7e620200,
1392         0x7e640209, 0x7e660208,
1393         0x7e680207, 0x7e6a0206,
1394         0x7e6c0205, 0x7e6e0204,
1395         0x7e700203, 0x7e720202,
1396         0x7e740201, 0x7e760200,
1397         0x7e780209, 0x7e7a0208,
1398         0x7e7c0207, 0x7e7e0206,
1399         0xbf8a0000, 0xbf810000,
1400 };
1401
1402 static const u32 sgpr_init_compute_shader[] =
1403 {
1404         0xbe8a0100, 0xbe8c0102,
1405         0xbe8e0104, 0xbe900106,
1406         0xbe920108, 0xbe940100,
1407         0xbe960102, 0xbe980104,
1408         0xbe9a0106, 0xbe9c0108,
1409         0xbe9e0100, 0xbea00102,
1410         0xbea20104, 0xbea40106,
1411         0xbea60108, 0xbea80100,
1412         0xbeaa0102, 0xbeac0104,
1413         0xbeae0106, 0xbeb00108,
1414         0xbeb20100, 0xbeb40102,
1415         0xbeb60104, 0xbeb80106,
1416         0xbeba0108, 0xbebc0100,
1417         0xbebe0102, 0xbec00104,
1418         0xbec20106, 0xbec40108,
1419         0xbec60100, 0xbec80102,
1420         0xbee60004, 0xbee70005,
1421         0xbeea0006, 0xbeeb0007,
1422         0xbee80008, 0xbee90009,
1423         0xbefc0000, 0xbf8a0000,
1424         0xbf810000, 0x00000000,
1425 };
1426
1427 static const u32 vgpr_init_regs[] =
1428 {
1429         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1430         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1431         mmCOMPUTE_NUM_THREAD_X, 256*4,
1432         mmCOMPUTE_NUM_THREAD_Y, 1,
1433         mmCOMPUTE_NUM_THREAD_Z, 1,
1434         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1435         mmCOMPUTE_PGM_RSRC2, 20,
1436         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1437         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1438         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1439         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1440         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1441         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1442         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1443         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1444         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1445         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1446 };
1447
1448 static const u32 sgpr1_init_regs[] =
1449 {
1450         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1451         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1452         mmCOMPUTE_NUM_THREAD_X, 256*5,
1453         mmCOMPUTE_NUM_THREAD_Y, 1,
1454         mmCOMPUTE_NUM_THREAD_Z, 1,
1455         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1456         mmCOMPUTE_PGM_RSRC2, 20,
1457         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1458         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1459         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1460         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1461         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1462         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1463         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1464         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1465         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1466         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1467 };
1468
1469 static const u32 sgpr2_init_regs[] =
1470 {
1471         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1472         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1473         mmCOMPUTE_NUM_THREAD_X, 256*5,
1474         mmCOMPUTE_NUM_THREAD_Y, 1,
1475         mmCOMPUTE_NUM_THREAD_Z, 1,
1476         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1477         mmCOMPUTE_PGM_RSRC2, 20,
1478         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1479         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1480         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1481         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1482         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1483         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1484         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1485         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1486         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1487         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1488 };
1489
1490 static const u32 sec_ded_counter_registers[] =
1491 {
1492         mmCPC_EDC_ATC_CNT,
1493         mmCPC_EDC_SCRATCH_CNT,
1494         mmCPC_EDC_UCODE_CNT,
1495         mmCPF_EDC_ATC_CNT,
1496         mmCPF_EDC_ROQ_CNT,
1497         mmCPF_EDC_TAG_CNT,
1498         mmCPG_EDC_ATC_CNT,
1499         mmCPG_EDC_DMA_CNT,
1500         mmCPG_EDC_TAG_CNT,
1501         mmDC_EDC_CSINVOC_CNT,
1502         mmDC_EDC_RESTORE_CNT,
1503         mmDC_EDC_STATE_CNT,
1504         mmGDS_EDC_CNT,
1505         mmGDS_EDC_GRBM_CNT,
1506         mmGDS_EDC_OA_DED,
1507         mmSPI_EDC_CNT,
1508         mmSQC_ATC_EDC_GATCL1_CNT,
1509         mmSQC_EDC_CNT,
1510         mmSQ_EDC_DED_CNT,
1511         mmSQ_EDC_INFO,
1512         mmSQ_EDC_SEC_CNT,
1513         mmTCC_EDC_CNT,
1514         mmTCP_ATC_EDC_GATCL1_CNT,
1515         mmTCP_EDC_CNT,
1516         mmTD_EDC_CNT
1517 };
1518
1519 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1520 {
1521         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1522         struct amdgpu_ib ib;
1523         struct dma_fence *f = NULL;
1524         int r, i;
1525         u32 tmp;
1526         unsigned total_size, vgpr_offset, sgpr_offset;
1527         u64 gpu_addr;
1528
1529         /* only supported on CZ */
1530         if (adev->asic_type != CHIP_CARRIZO)
1531                 return 0;
1532
1533         /* bail if the compute ring is not ready */
1534         if (!ring->sched.ready)
1535                 return 0;
1536
1537         tmp = RREG32(mmGB_EDC_MODE);
1538         WREG32(mmGB_EDC_MODE, 0);
1539
1540         total_size =
1541                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542         total_size +=
1543                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1544         total_size +=
1545                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1546         total_size = ALIGN(total_size, 256);
1547         vgpr_offset = total_size;
1548         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1549         sgpr_offset = total_size;
1550         total_size += sizeof(sgpr_init_compute_shader);
1551
1552         /* allocate an indirect buffer to put the commands in */
1553         memset(&ib, 0, sizeof(ib));
1554         r = amdgpu_ib_get(adev, NULL, total_size,
1555                                         AMDGPU_IB_POOL_DIRECT, &ib);
1556         if (r) {
1557                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1558                 return r;
1559         }
1560
1561         /* load the compute shaders */
1562         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1563                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1564
1565         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1566                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1567
1568         /* init the ib length to 0 */
1569         ib.length_dw = 0;
1570
1571         /* VGPR */
1572         /* write the register state for the compute dispatch */
1573         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1574                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1575                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1576                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1577         }
1578         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1579         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1580         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1581         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1582         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1583         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1584
1585         /* write dispatch packet */
1586         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1587         ib.ptr[ib.length_dw++] = 8; /* x */
1588         ib.ptr[ib.length_dw++] = 1; /* y */
1589         ib.ptr[ib.length_dw++] = 1; /* z */
1590         ib.ptr[ib.length_dw++] =
1591                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1592
1593         /* write CS partial flush packet */
1594         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1595         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1596
1597         /* SGPR1 */
1598         /* write the register state for the compute dispatch */
1599         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1600                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1601                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1602                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1603         }
1604         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1605         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1606         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1607         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1608         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1609         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1610
1611         /* write dispatch packet */
1612         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1613         ib.ptr[ib.length_dw++] = 8; /* x */
1614         ib.ptr[ib.length_dw++] = 1; /* y */
1615         ib.ptr[ib.length_dw++] = 1; /* z */
1616         ib.ptr[ib.length_dw++] =
1617                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1618
1619         /* write CS partial flush packet */
1620         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1621         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1622
1623         /* SGPR2 */
1624         /* write the register state for the compute dispatch */
1625         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1626                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1627                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1628                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1629         }
1630         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1631         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1632         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1633         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1634         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1635         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1636
1637         /* write dispatch packet */
1638         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1639         ib.ptr[ib.length_dw++] = 8; /* x */
1640         ib.ptr[ib.length_dw++] = 1; /* y */
1641         ib.ptr[ib.length_dw++] = 1; /* z */
1642         ib.ptr[ib.length_dw++] =
1643                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1644
1645         /* write CS partial flush packet */
1646         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1647         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1648
1649         /* shedule the ib on the ring */
1650         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1651         if (r) {
1652                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1653                 goto fail;
1654         }
1655
1656         /* wait for the GPU to finish processing the IB */
1657         r = dma_fence_wait(f, false);
1658         if (r) {
1659                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1660                 goto fail;
1661         }
1662
1663         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1664         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1665         WREG32(mmGB_EDC_MODE, tmp);
1666
1667         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1668         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1669         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1670
1671
1672         /* read back registers to clear the counters */
1673         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1674                 RREG32(sec_ded_counter_registers[i]);
1675
1676 fail:
1677         amdgpu_ib_free(adev, &ib, NULL);
1678         dma_fence_put(f);
1679
1680         return r;
1681 }
1682
1683 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1684 {
1685         u32 gb_addr_config;
1686         u32 mc_arb_ramcfg;
1687         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1688         u32 tmp;
1689         int ret;
1690
1691         switch (adev->asic_type) {
1692         case CHIP_TOPAZ:
1693                 adev->gfx.config.max_shader_engines = 1;
1694                 adev->gfx.config.max_tile_pipes = 2;
1695                 adev->gfx.config.max_cu_per_sh = 6;
1696                 adev->gfx.config.max_sh_per_se = 1;
1697                 adev->gfx.config.max_backends_per_se = 2;
1698                 adev->gfx.config.max_texture_channel_caches = 2;
1699                 adev->gfx.config.max_gprs = 256;
1700                 adev->gfx.config.max_gs_threads = 32;
1701                 adev->gfx.config.max_hw_contexts = 8;
1702
1703                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1704                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1705                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1706                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1707                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1708                 break;
1709         case CHIP_FIJI:
1710                 adev->gfx.config.max_shader_engines = 4;
1711                 adev->gfx.config.max_tile_pipes = 16;
1712                 adev->gfx.config.max_cu_per_sh = 16;
1713                 adev->gfx.config.max_sh_per_se = 1;
1714                 adev->gfx.config.max_backends_per_se = 4;
1715                 adev->gfx.config.max_texture_channel_caches = 16;
1716                 adev->gfx.config.max_gprs = 256;
1717                 adev->gfx.config.max_gs_threads = 32;
1718                 adev->gfx.config.max_hw_contexts = 8;
1719
1720                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1721                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1722                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1723                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1724                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1725                 break;
1726         case CHIP_POLARIS11:
1727         case CHIP_POLARIS12:
1728                 ret = amdgpu_atombios_get_gfx_info(adev);
1729                 if (ret)
1730                         return ret;
1731                 adev->gfx.config.max_gprs = 256;
1732                 adev->gfx.config.max_gs_threads = 32;
1733                 adev->gfx.config.max_hw_contexts = 8;
1734
1735                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1736                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1737                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1738                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1739                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1740                 break;
1741         case CHIP_POLARIS10:
1742         case CHIP_VEGAM:
1743                 ret = amdgpu_atombios_get_gfx_info(adev);
1744                 if (ret)
1745                         return ret;
1746                 adev->gfx.config.max_gprs = 256;
1747                 adev->gfx.config.max_gs_threads = 32;
1748                 adev->gfx.config.max_hw_contexts = 8;
1749
1750                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1755                 break;
1756         case CHIP_TONGA:
1757                 adev->gfx.config.max_shader_engines = 4;
1758                 adev->gfx.config.max_tile_pipes = 8;
1759                 adev->gfx.config.max_cu_per_sh = 8;
1760                 adev->gfx.config.max_sh_per_se = 1;
1761                 adev->gfx.config.max_backends_per_se = 2;
1762                 adev->gfx.config.max_texture_channel_caches = 8;
1763                 adev->gfx.config.max_gprs = 256;
1764                 adev->gfx.config.max_gs_threads = 32;
1765                 adev->gfx.config.max_hw_contexts = 8;
1766
1767                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1772                 break;
1773         case CHIP_CARRIZO:
1774                 adev->gfx.config.max_shader_engines = 1;
1775                 adev->gfx.config.max_tile_pipes = 2;
1776                 adev->gfx.config.max_sh_per_se = 1;
1777                 adev->gfx.config.max_backends_per_se = 2;
1778                 adev->gfx.config.max_cu_per_sh = 8;
1779                 adev->gfx.config.max_texture_channel_caches = 2;
1780                 adev->gfx.config.max_gprs = 256;
1781                 adev->gfx.config.max_gs_threads = 32;
1782                 adev->gfx.config.max_hw_contexts = 8;
1783
1784                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1789                 break;
1790         case CHIP_STONEY:
1791                 adev->gfx.config.max_shader_engines = 1;
1792                 adev->gfx.config.max_tile_pipes = 2;
1793                 adev->gfx.config.max_sh_per_se = 1;
1794                 adev->gfx.config.max_backends_per_se = 1;
1795                 adev->gfx.config.max_cu_per_sh = 3;
1796                 adev->gfx.config.max_texture_channel_caches = 2;
1797                 adev->gfx.config.max_gprs = 256;
1798                 adev->gfx.config.max_gs_threads = 16;
1799                 adev->gfx.config.max_hw_contexts = 8;
1800
1801                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1806                 break;
1807         default:
1808                 adev->gfx.config.max_shader_engines = 2;
1809                 adev->gfx.config.max_tile_pipes = 4;
1810                 adev->gfx.config.max_cu_per_sh = 2;
1811                 adev->gfx.config.max_sh_per_se = 1;
1812                 adev->gfx.config.max_backends_per_se = 2;
1813                 adev->gfx.config.max_texture_channel_caches = 4;
1814                 adev->gfx.config.max_gprs = 256;
1815                 adev->gfx.config.max_gs_threads = 32;
1816                 adev->gfx.config.max_hw_contexts = 8;
1817
1818                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1823                 break;
1824         }
1825
1826         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1827         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1828
1829         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1830                                 MC_ARB_RAMCFG, NOOFBANK);
1831         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1832                                 MC_ARB_RAMCFG, NOOFRANKS);
1833
1834         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1835         adev->gfx.config.mem_max_burst_length_bytes = 256;
1836         if (adev->flags & AMD_IS_APU) {
1837                 /* Get memory bank mapping mode. */
1838                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1839                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841
1842                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1843                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1844                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1845
1846                 /* Validate settings in case only one DIMM installed. */
1847                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1848                         dimm00_addr_map = 0;
1849                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1850                         dimm01_addr_map = 0;
1851                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1852                         dimm10_addr_map = 0;
1853                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1854                         dimm11_addr_map = 0;
1855
1856                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1857                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1858                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1859                         adev->gfx.config.mem_row_size_in_kb = 2;
1860                 else
1861                         adev->gfx.config.mem_row_size_in_kb = 1;
1862         } else {
1863                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1864                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1865                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1866                         adev->gfx.config.mem_row_size_in_kb = 4;
1867         }
1868
1869         adev->gfx.config.shader_engine_tile_size = 32;
1870         adev->gfx.config.num_gpus = 1;
1871         adev->gfx.config.multi_gpu_tile_size = 64;
1872
1873         /* fix up row size */
1874         switch (adev->gfx.config.mem_row_size_in_kb) {
1875         case 1:
1876         default:
1877                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1878                 break;
1879         case 2:
1880                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1881                 break;
1882         case 4:
1883                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1884                 break;
1885         }
1886         adev->gfx.config.gb_addr_config = gb_addr_config;
1887
1888         return 0;
1889 }
1890
1891 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1892                                         int mec, int pipe, int queue)
1893 {
1894         int r;
1895         unsigned irq_type;
1896         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1897
1898         ring = &adev->gfx.compute_ring[ring_id];
1899
1900         /* mec0 is me1 */
1901         ring->me = mec + 1;
1902         ring->pipe = pipe;
1903         ring->queue = queue;
1904
1905         ring->ring_obj = NULL;
1906         ring->use_doorbell = true;
1907         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1908         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1909                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1910         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1911
1912         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1913                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1914                 + ring->pipe;
1915
1916         /* type-2 packets are deprecated on MEC, use type-3 instead */
1917         r = amdgpu_ring_init(adev, ring, 1024,
1918                         &adev->gfx.eop_irq, irq_type);
1919         if (r)
1920                 return r;
1921
1922
1923         return 0;
1924 }
1925
1926 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1927
1928 static int gfx_v8_0_sw_init(void *handle)
1929 {
1930         int i, j, k, r, ring_id;
1931         struct amdgpu_ring *ring;
1932         struct amdgpu_kiq *kiq;
1933         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1934
1935         switch (adev->asic_type) {
1936         case CHIP_TONGA:
1937         case CHIP_CARRIZO:
1938         case CHIP_FIJI:
1939         case CHIP_POLARIS10:
1940         case CHIP_POLARIS11:
1941         case CHIP_POLARIS12:
1942         case CHIP_VEGAM:
1943                 adev->gfx.mec.num_mec = 2;
1944                 break;
1945         case CHIP_TOPAZ:
1946         case CHIP_STONEY:
1947         default:
1948                 adev->gfx.mec.num_mec = 1;
1949                 break;
1950         }
1951
1952         adev->gfx.mec.num_pipe_per_mec = 4;
1953         adev->gfx.mec.num_queue_per_pipe = 8;
1954
1955         /* EOP Event */
1956         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1957         if (r)
1958                 return r;
1959
1960         /* Privileged reg */
1961         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1962                               &adev->gfx.priv_reg_irq);
1963         if (r)
1964                 return r;
1965
1966         /* Privileged inst */
1967         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1968                               &adev->gfx.priv_inst_irq);
1969         if (r)
1970                 return r;
1971
1972         /* Add CP EDC/ECC irq  */
1973         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1974                               &adev->gfx.cp_ecc_error_irq);
1975         if (r)
1976                 return r;
1977
1978         /* SQ interrupts. */
1979         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1980                               &adev->gfx.sq_irq);
1981         if (r) {
1982                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1983                 return r;
1984         }
1985
1986         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1987
1988         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1989
1990         gfx_v8_0_scratch_init(adev);
1991
1992         r = gfx_v8_0_init_microcode(adev);
1993         if (r) {
1994                 DRM_ERROR("Failed to load gfx firmware!\n");
1995                 return r;
1996         }
1997
1998         r = adev->gfx.rlc.funcs->init(adev);
1999         if (r) {
2000                 DRM_ERROR("Failed to init rlc BOs!\n");
2001                 return r;
2002         }
2003
2004         r = gfx_v8_0_mec_init(adev);
2005         if (r) {
2006                 DRM_ERROR("Failed to init MEC BOs!\n");
2007                 return r;
2008         }
2009
2010         /* set up the gfx ring */
2011         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2012                 ring = &adev->gfx.gfx_ring[i];
2013                 ring->ring_obj = NULL;
2014                 sprintf(ring->name, "gfx");
2015                 /* no gfx doorbells on iceland */
2016                 if (adev->asic_type != CHIP_TOPAZ) {
2017                         ring->use_doorbell = true;
2018                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2019                 }
2020
2021                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2022                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2023                 if (r)
2024                         return r;
2025         }
2026
2027
2028         /* set up the compute queues - allocate horizontally across pipes */
2029         ring_id = 0;
2030         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2031                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2032                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2033                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2034                                         continue;
2035
2036                                 r = gfx_v8_0_compute_ring_init(adev,
2037                                                                 ring_id,
2038                                                                 i, k, j);
2039                                 if (r)
2040                                         return r;
2041
2042                                 ring_id++;
2043                         }
2044                 }
2045         }
2046
2047         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2048         if (r) {
2049                 DRM_ERROR("Failed to init KIQ BOs!\n");
2050                 return r;
2051         }
2052
2053         kiq = &adev->gfx.kiq;
2054         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2055         if (r)
2056                 return r;
2057
2058         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2059         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2060         if (r)
2061                 return r;
2062
2063         adev->gfx.ce_ram_size = 0x8000;
2064
2065         r = gfx_v8_0_gpu_early_init(adev);
2066         if (r)
2067                 return r;
2068
2069         return 0;
2070 }
2071
2072 static int gfx_v8_0_sw_fini(void *handle)
2073 {
2074         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2075         int i;
2076
2077         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2078                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2079         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2080                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2081
2082         amdgpu_gfx_mqd_sw_fini(adev);
2083         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2084         amdgpu_gfx_kiq_fini(adev);
2085
2086         gfx_v8_0_mec_fini(adev);
2087         amdgpu_gfx_rlc_fini(adev);
2088         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2089                                 &adev->gfx.rlc.clear_state_gpu_addr,
2090                                 (void **)&adev->gfx.rlc.cs_ptr);
2091         if ((adev->asic_type == CHIP_CARRIZO) ||
2092             (adev->asic_type == CHIP_STONEY)) {
2093                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2094                                 &adev->gfx.rlc.cp_table_gpu_addr,
2095                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2096         }
2097         gfx_v8_0_free_microcode(adev);
2098
2099         return 0;
2100 }
2101
2102 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2103 {
2104         uint32_t *modearray, *mod2array;
2105         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2106         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2107         u32 reg_offset;
2108
2109         modearray = adev->gfx.config.tile_mode_array;
2110         mod2array = adev->gfx.config.macrotile_mode_array;
2111
2112         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2113                 modearray[reg_offset] = 0;
2114
2115         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2116                 mod2array[reg_offset] = 0;
2117
2118         switch (adev->asic_type) {
2119         case CHIP_TOPAZ:
2120                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121                                 PIPE_CONFIG(ADDR_SURF_P2) |
2122                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2123                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2124                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125                                 PIPE_CONFIG(ADDR_SURF_P2) |
2126                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2128                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129                                 PIPE_CONFIG(ADDR_SURF_P2) |
2130                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2131                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2132                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133                                 PIPE_CONFIG(ADDR_SURF_P2) |
2134                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2136                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137                                 PIPE_CONFIG(ADDR_SURF_P2) |
2138                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2140                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141                                 PIPE_CONFIG(ADDR_SURF_P2) |
2142                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2145                                 PIPE_CONFIG(ADDR_SURF_P2) |
2146                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2147                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2));
2150                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2151                                 PIPE_CONFIG(ADDR_SURF_P2) |
2152                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2153                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2154                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155                                  PIPE_CONFIG(ADDR_SURF_P2) |
2156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2158                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159                                  PIPE_CONFIG(ADDR_SURF_P2) |
2160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2162                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2163                                  PIPE_CONFIG(ADDR_SURF_P2) |
2164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2166                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167                                  PIPE_CONFIG(ADDR_SURF_P2) |
2168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2171                                  PIPE_CONFIG(ADDR_SURF_P2) |
2172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                  PIPE_CONFIG(ADDR_SURF_P2) |
2176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2178                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2179                                  PIPE_CONFIG(ADDR_SURF_P2) |
2180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2182                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2186                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2190                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2194                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2198                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2202                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2214                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2222
2223                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2224                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2225                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2226                                 NUM_BANKS(ADDR_SURF_8_BANK));
2227                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2228                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2229                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2230                                 NUM_BANKS(ADDR_SURF_8_BANK));
2231                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2232                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2234                                 NUM_BANKS(ADDR_SURF_8_BANK));
2235                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2237                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2238                                 NUM_BANKS(ADDR_SURF_8_BANK));
2239                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2241                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2242                                 NUM_BANKS(ADDR_SURF_8_BANK));
2243                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246                                 NUM_BANKS(ADDR_SURF_8_BANK));
2247                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2249                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250                                 NUM_BANKS(ADDR_SURF_8_BANK));
2251                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2254                                 NUM_BANKS(ADDR_SURF_16_BANK));
2255                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2258                                 NUM_BANKS(ADDR_SURF_16_BANK));
2259                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2260                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2262                                  NUM_BANKS(ADDR_SURF_16_BANK));
2263                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2264                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2265                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2266                                  NUM_BANKS(ADDR_SURF_16_BANK));
2267                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2269                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270                                  NUM_BANKS(ADDR_SURF_16_BANK));
2271                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274                                  NUM_BANKS(ADDR_SURF_16_BANK));
2275                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278                                  NUM_BANKS(ADDR_SURF_8_BANK));
2279
2280                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2281                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2282                             reg_offset != 23)
2283                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2284
2285                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2286                         if (reg_offset != 7)
2287                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2288
2289                 break;
2290         case CHIP_FIJI:
2291         case CHIP_VEGAM:
2292                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2294                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2295                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2296                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2298                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2299                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2300                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2302                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2303                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2304                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2305                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2306                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2307                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2308                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2312                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2315                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2316                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2317                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2319                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2320                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2321                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2322                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2323                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2324                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2325                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2326                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2329                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2330                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2334                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2338                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2339                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2342                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2343                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2346                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2351                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2354                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2355                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2357                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2358                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2359                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2360                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2361                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2362                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2363                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2366                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2367                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2369                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2370                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2371                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2373                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2374                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2375                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2377                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2378                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2379                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2381                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2382                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2383                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2384                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2385                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2386                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2387                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2390                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2391                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2394                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2395                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2410                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2414
2415                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2417                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2418                                 NUM_BANKS(ADDR_SURF_8_BANK));
2419                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2421                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2422                                 NUM_BANKS(ADDR_SURF_8_BANK));
2423                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2425                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2426                                 NUM_BANKS(ADDR_SURF_8_BANK));
2427                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2429                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2430                                 NUM_BANKS(ADDR_SURF_8_BANK));
2431                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2433                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2434                                 NUM_BANKS(ADDR_SURF_8_BANK));
2435                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2438                                 NUM_BANKS(ADDR_SURF_8_BANK));
2439                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2440                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2441                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2442                                 NUM_BANKS(ADDR_SURF_8_BANK));
2443                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2445                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2446                                 NUM_BANKS(ADDR_SURF_8_BANK));
2447                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450                                 NUM_BANKS(ADDR_SURF_8_BANK));
2451                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2453                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2454                                  NUM_BANKS(ADDR_SURF_8_BANK));
2455                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458                                  NUM_BANKS(ADDR_SURF_8_BANK));
2459                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2461                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2462                                  NUM_BANKS(ADDR_SURF_8_BANK));
2463                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2466                                  NUM_BANKS(ADDR_SURF_8_BANK));
2467                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470                                  NUM_BANKS(ADDR_SURF_4_BANK));
2471
2472                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2473                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2474
2475                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2476                         if (reg_offset != 7)
2477                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2478
2479                 break;
2480         case CHIP_TONGA:
2481                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2483                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2484                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2485                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2487                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2488                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2489                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2491                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2492                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2493                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2495                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2496                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2497                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2498                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2499                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2500                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2501                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2502                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2504                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2505                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2506                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2508                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2509                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2511                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2512                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2513                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2514                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2515                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2518                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2522                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2523                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2526                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2527                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2528                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2530                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2531                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2532                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2534                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2535                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2538                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2539                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2540                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2542                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2543                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2544                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2546                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2547                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2548                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2549                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2550                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2551                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2552                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2554                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2555                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2556                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2558                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2559                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2560                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2562                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2563                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2564                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2566                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2567                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2568                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2570                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2571                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2572                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2573                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2574                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2575                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2576                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2578                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2579                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2580                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2582                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2583                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2584                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2586                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2590                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2594                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2596                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2598                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2599                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2602                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2603
2604                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2606                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2607                                 NUM_BANKS(ADDR_SURF_16_BANK));
2608                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2610                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2611                                 NUM_BANKS(ADDR_SURF_16_BANK));
2612                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2614                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2615                                 NUM_BANKS(ADDR_SURF_16_BANK));
2616                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619                                 NUM_BANKS(ADDR_SURF_16_BANK));
2620                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2635                                 NUM_BANKS(ADDR_SURF_16_BANK));
2636                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639                                 NUM_BANKS(ADDR_SURF_16_BANK));
2640                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2642                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2643                                  NUM_BANKS(ADDR_SURF_16_BANK));
2644                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2647                                  NUM_BANKS(ADDR_SURF_16_BANK));
2648                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2651                                  NUM_BANKS(ADDR_SURF_8_BANK));
2652                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2654                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2655                                  NUM_BANKS(ADDR_SURF_4_BANK));
2656                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659                                  NUM_BANKS(ADDR_SURF_4_BANK));
2660
2661                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2662                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2663
2664                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2665                         if (reg_offset != 7)
2666                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2667
2668                 break;
2669         case CHIP_POLARIS11:
2670         case CHIP_POLARIS12:
2671                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2674                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2675                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2678                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2682                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2683                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2686                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2687                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2690                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2692                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2694                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2698                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2699                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2708                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2712                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2716                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2717                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2720                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2721                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2724                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2728                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2730                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2732                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2736                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2737                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2741                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2745                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2749                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2753                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2757                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2761                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2765                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2769                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2784                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2788                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2789                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2793
2794                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2796                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2797                                 NUM_BANKS(ADDR_SURF_16_BANK));
2798
2799                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2801                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2802                                 NUM_BANKS(ADDR_SURF_16_BANK));
2803
2804                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807                                 NUM_BANKS(ADDR_SURF_16_BANK));
2808
2809                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2811                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2812                                 NUM_BANKS(ADDR_SURF_16_BANK));
2813
2814                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2816                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2817                                 NUM_BANKS(ADDR_SURF_16_BANK));
2818
2819                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2821                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2822                                 NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837                                 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2841                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2842                                 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848
2849                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853
2854                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2857                                 NUM_BANKS(ADDR_SURF_8_BANK));
2858
2859                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2860                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2861                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2862                                 NUM_BANKS(ADDR_SURF_4_BANK));
2863
2864                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2865                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2866
2867                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2868                         if (reg_offset != 7)
2869                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2870
2871                 break;
2872         case CHIP_POLARIS10:
2873                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2874                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2875                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2877                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2881                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2883                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2884                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2885                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2888                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2889                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2892                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2893                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2894                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2896                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2897                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2898                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2900                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2901                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2904                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2907                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2911                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2915                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2919                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2923                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2926                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2932                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2939                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2943                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2944                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2946                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2947                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2948                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2950                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2951                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2952                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2954                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2955                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2958                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2959                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2962                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2963                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2964                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2967                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2968                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2971                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2974                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2982                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2986                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2990                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2991                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2992                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2994                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2995
2996                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000
3001                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3002                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3003                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3004                                 NUM_BANKS(ADDR_SURF_16_BANK));
3005
3006                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3008                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009                                 NUM_BANKS(ADDR_SURF_16_BANK));
3010
3011                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015
3016                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3017                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3018                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3019                                 NUM_BANKS(ADDR_SURF_16_BANK));
3020
3021                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3024                                 NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3028                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3029                                 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039                                 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3043                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044                                 NUM_BANKS(ADDR_SURF_16_BANK));
3045
3046                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049                                 NUM_BANKS(ADDR_SURF_16_BANK));
3050
3051                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054                                 NUM_BANKS(ADDR_SURF_8_BANK));
3055
3056                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3058                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3059                                 NUM_BANKS(ADDR_SURF_4_BANK));
3060
3061                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3063                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3064                                 NUM_BANKS(ADDR_SURF_4_BANK));
3065
3066                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3067                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3068
3069                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3070                         if (reg_offset != 7)
3071                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3072
3073                 break;
3074         case CHIP_STONEY:
3075                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3076                                 PIPE_CONFIG(ADDR_SURF_P2) |
3077                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3078                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3079                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3080                                 PIPE_CONFIG(ADDR_SURF_P2) |
3081                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3082                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3083                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3084                                 PIPE_CONFIG(ADDR_SURF_P2) |
3085                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3086                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3087                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088                                 PIPE_CONFIG(ADDR_SURF_P2) |
3089                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3090                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3091                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092                                 PIPE_CONFIG(ADDR_SURF_P2) |
3093                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3094                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3095                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3096                                 PIPE_CONFIG(ADDR_SURF_P2) |
3097                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3098                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3099                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3100                                 PIPE_CONFIG(ADDR_SURF_P2) |
3101                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3102                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3103                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2));
3105                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3108                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3109                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110                                  PIPE_CONFIG(ADDR_SURF_P2) |
3111                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3112                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3113                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                  PIPE_CONFIG(ADDR_SURF_P2) |
3115                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3116                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3117                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118                                  PIPE_CONFIG(ADDR_SURF_P2) |
3119                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3120                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3121                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122                                  PIPE_CONFIG(ADDR_SURF_P2) |
3123                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3124                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3126                                  PIPE_CONFIG(ADDR_SURF_P2) |
3127                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3128                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3129                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130                                  PIPE_CONFIG(ADDR_SURF_P2) |
3131                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3132                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3133                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3134                                  PIPE_CONFIG(ADDR_SURF_P2) |
3135                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3136                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3137                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3138                                  PIPE_CONFIG(ADDR_SURF_P2) |
3139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3141                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3142                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3145                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3146                                  PIPE_CONFIG(ADDR_SURF_P2) |
3147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3149                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3150                                  PIPE_CONFIG(ADDR_SURF_P2) |
3151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3153                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3154                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3157                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3158                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3161                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3162                                  PIPE_CONFIG(ADDR_SURF_P2) |
3163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3166                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3173                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3174                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3177
3178                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3179                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3180                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3181                                 NUM_BANKS(ADDR_SURF_8_BANK));
3182                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3183                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3184                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185                                 NUM_BANKS(ADDR_SURF_8_BANK));
3186                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3187                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3188                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3189                                 NUM_BANKS(ADDR_SURF_8_BANK));
3190                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3192                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3193                                 NUM_BANKS(ADDR_SURF_8_BANK));
3194                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3197                                 NUM_BANKS(ADDR_SURF_8_BANK));
3198                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3199                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3200                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3201                                 NUM_BANKS(ADDR_SURF_8_BANK));
3202                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3203                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3204                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3205                                 NUM_BANKS(ADDR_SURF_8_BANK));
3206                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3207                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3208                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3209                                 NUM_BANKS(ADDR_SURF_16_BANK));
3210                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213                                 NUM_BANKS(ADDR_SURF_16_BANK));
3214                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3215                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3216                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3217                                  NUM_BANKS(ADDR_SURF_16_BANK));
3218                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3219                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3220                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3221                                  NUM_BANKS(ADDR_SURF_16_BANK));
3222                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3224                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3225                                  NUM_BANKS(ADDR_SURF_16_BANK));
3226                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3229                                  NUM_BANKS(ADDR_SURF_16_BANK));
3230                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233                                  NUM_BANKS(ADDR_SURF_8_BANK));
3234
3235                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3236                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3237                             reg_offset != 23)
3238                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3239
3240                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3241                         if (reg_offset != 7)
3242                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3243
3244                 break;
3245         default:
3246                 dev_warn(adev->dev,
3247                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3248                          adev->asic_type);
3249                 /* fall through */
3250
3251         case CHIP_CARRIZO:
3252                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253                                 PIPE_CONFIG(ADDR_SURF_P2) |
3254                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3256                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3257                                 PIPE_CONFIG(ADDR_SURF_P2) |
3258                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3260                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3261                                 PIPE_CONFIG(ADDR_SURF_P2) |
3262                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3263                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3264                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3265                                 PIPE_CONFIG(ADDR_SURF_P2) |
3266                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3267                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3268                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3269                                 PIPE_CONFIG(ADDR_SURF_P2) |
3270                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3271                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3272                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                 PIPE_CONFIG(ADDR_SURF_P2) |
3274                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3275                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3276                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3277                                 PIPE_CONFIG(ADDR_SURF_P2) |
3278                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3279                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2));
3282                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3285                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3286                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287                                  PIPE_CONFIG(ADDR_SURF_P2) |
3288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3290                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3291                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3294                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3295                                  PIPE_CONFIG(ADDR_SURF_P2) |
3296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3298                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3299                                  PIPE_CONFIG(ADDR_SURF_P2) |
3300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3302                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3303                                  PIPE_CONFIG(ADDR_SURF_P2) |
3304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3307                                  PIPE_CONFIG(ADDR_SURF_P2) |
3308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3310                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3314                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3315                                  PIPE_CONFIG(ADDR_SURF_P2) |
3316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3318                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3319                                  PIPE_CONFIG(ADDR_SURF_P2) |
3320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3322                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3323                                  PIPE_CONFIG(ADDR_SURF_P2) |
3324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3326                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3327                                  PIPE_CONFIG(ADDR_SURF_P2) |
3328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3330                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3331                                  PIPE_CONFIG(ADDR_SURF_P2) |
3332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3334                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3335                                  PIPE_CONFIG(ADDR_SURF_P2) |
3336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3339                                  PIPE_CONFIG(ADDR_SURF_P2) |
3340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3343                                  PIPE_CONFIG(ADDR_SURF_P2) |
3344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3346                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3347                                  PIPE_CONFIG(ADDR_SURF_P2) |
3348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3350                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3351                                  PIPE_CONFIG(ADDR_SURF_P2) |
3352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3354
3355                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358                                 NUM_BANKS(ADDR_SURF_8_BANK));
3359                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362                                 NUM_BANKS(ADDR_SURF_8_BANK));
3363                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3366                                 NUM_BANKS(ADDR_SURF_8_BANK));
3367                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370                                 NUM_BANKS(ADDR_SURF_8_BANK));
3371                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3374                                 NUM_BANKS(ADDR_SURF_8_BANK));
3375                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3378                                 NUM_BANKS(ADDR_SURF_8_BANK));
3379                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3381                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3382                                 NUM_BANKS(ADDR_SURF_8_BANK));
3383                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3384                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3385                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3386                                 NUM_BANKS(ADDR_SURF_16_BANK));
3387                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3388                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3389                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3390                                 NUM_BANKS(ADDR_SURF_16_BANK));
3391                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3392                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3393                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3394                                  NUM_BANKS(ADDR_SURF_16_BANK));
3395                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3396                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3397                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3398                                  NUM_BANKS(ADDR_SURF_16_BANK));
3399                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3400                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3401                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3402                                  NUM_BANKS(ADDR_SURF_16_BANK));
3403                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3404                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3405                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3406                                  NUM_BANKS(ADDR_SURF_16_BANK));
3407                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3408                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3409                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3410                                  NUM_BANKS(ADDR_SURF_8_BANK));
3411
3412                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3413                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3414                             reg_offset != 23)
3415                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3416
3417                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3418                         if (reg_offset != 7)
3419                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3420
3421                 break;
3422         }
3423 }
3424
3425 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3426                                   u32 se_num, u32 sh_num, u32 instance)
3427 {
3428         u32 data;
3429
3430         if (instance == 0xffffffff)
3431                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3432         else
3433                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3434
3435         if (se_num == 0xffffffff)
3436                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3437         else
3438                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3439
3440         if (sh_num == 0xffffffff)
3441                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3442         else
3443                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3444
3445         WREG32(mmGRBM_GFX_INDEX, data);
3446 }
3447
3448 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3449                                   u32 me, u32 pipe, u32 q, u32 vm)
3450 {
3451         vi_srbm_select(adev, me, pipe, q, vm);
3452 }
3453
3454 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3455 {
3456         u32 data, mask;
3457
3458         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3459                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3460
3461         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3462
3463         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3464                                          adev->gfx.config.max_sh_per_se);
3465
3466         return (~data) & mask;
3467 }
3468
3469 static void
3470 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3471 {
3472         switch (adev->asic_type) {
3473         case CHIP_FIJI:
3474         case CHIP_VEGAM:
3475                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3476                           RB_XSEL2(1) | PKR_MAP(2) |
3477                           PKR_XSEL(1) | PKR_YSEL(1) |
3478                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3479                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3480                            SE_PAIR_YSEL(2);
3481                 break;
3482         case CHIP_TONGA:
3483         case CHIP_POLARIS10:
3484                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3485                           SE_XSEL(1) | SE_YSEL(1);
3486                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3487                            SE_PAIR_YSEL(2);
3488                 break;
3489         case CHIP_TOPAZ:
3490         case CHIP_CARRIZO:
3491                 *rconf |= RB_MAP_PKR0(2);
3492                 *rconf1 |= 0x0;
3493                 break;
3494         case CHIP_POLARIS11:
3495         case CHIP_POLARIS12:
3496                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3497                           SE_XSEL(1) | SE_YSEL(1);
3498                 *rconf1 |= 0x0;
3499                 break;
3500         case CHIP_STONEY:
3501                 *rconf |= 0x0;
3502                 *rconf1 |= 0x0;
3503                 break;
3504         default:
3505                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3506                 break;
3507         }
3508 }
3509
3510 static void
3511 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3512                                         u32 raster_config, u32 raster_config_1,
3513                                         unsigned rb_mask, unsigned num_rb)
3514 {
3515         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3516         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3517         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3518         unsigned rb_per_se = num_rb / num_se;
3519         unsigned se_mask[4];
3520         unsigned se;
3521
3522         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3523         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3524         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3525         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3526
3527         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3528         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3529         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3530
3531         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3532                              (!se_mask[2] && !se_mask[3]))) {
3533                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3534
3535                 if (!se_mask[0] && !se_mask[1]) {
3536                         raster_config_1 |=
3537                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3538                 } else {
3539                         raster_config_1 |=
3540                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3541                 }
3542         }
3543
3544         for (se = 0; se < num_se; se++) {
3545                 unsigned raster_config_se = raster_config;
3546                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3547                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3548                 int idx = (se / 2) * 2;
3549
3550                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3551                         raster_config_se &= ~SE_MAP_MASK;
3552
3553                         if (!se_mask[idx]) {
3554                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3555                         } else {
3556                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3557                         }
3558                 }
3559
3560                 pkr0_mask &= rb_mask;
3561                 pkr1_mask &= rb_mask;
3562                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3563                         raster_config_se &= ~PKR_MAP_MASK;
3564
3565                         if (!pkr0_mask) {
3566                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3567                         } else {
3568                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3569                         }
3570                 }
3571
3572                 if (rb_per_se >= 2) {
3573                         unsigned rb0_mask = 1 << (se * rb_per_se);
3574                         unsigned rb1_mask = rb0_mask << 1;
3575
3576                         rb0_mask &= rb_mask;
3577                         rb1_mask &= rb_mask;
3578                         if (!rb0_mask || !rb1_mask) {
3579                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3580
3581                                 if (!rb0_mask) {
3582                                         raster_config_se |=
3583                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3584                                 } else {
3585                                         raster_config_se |=
3586                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3587                                 }
3588                         }
3589
3590                         if (rb_per_se > 2) {
3591                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3592                                 rb1_mask = rb0_mask << 1;
3593                                 rb0_mask &= rb_mask;
3594                                 rb1_mask &= rb_mask;
3595                                 if (!rb0_mask || !rb1_mask) {
3596                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3597
3598                                         if (!rb0_mask) {
3599                                                 raster_config_se |=
3600                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3601                                         } else {
3602                                                 raster_config_se |=
3603                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3604                                         }
3605                                 }
3606                         }
3607                 }
3608
3609                 /* GRBM_GFX_INDEX has a different offset on VI */
3610                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3611                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3612                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3613         }
3614
3615         /* GRBM_GFX_INDEX has a different offset on VI */
3616         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3617 }
3618
3619 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3620 {
3621         int i, j;
3622         u32 data;
3623         u32 raster_config = 0, raster_config_1 = 0;
3624         u32 active_rbs = 0;
3625         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3626                                         adev->gfx.config.max_sh_per_se;
3627         unsigned num_rb_pipes;
3628
3629         mutex_lock(&adev->grbm_idx_mutex);
3630         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3631                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3632                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3633                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3634                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3635                                                rb_bitmap_width_per_sh);
3636                 }
3637         }
3638         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3639
3640         adev->gfx.config.backend_enable_mask = active_rbs;
3641         adev->gfx.config.num_rbs = hweight32(active_rbs);
3642
3643         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3644                              adev->gfx.config.max_shader_engines, 16);
3645
3646         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3647
3648         if (!adev->gfx.config.backend_enable_mask ||
3649                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3650                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3651                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3652         } else {
3653                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3654                                                         adev->gfx.config.backend_enable_mask,
3655                                                         num_rb_pipes);
3656         }
3657
3658         /* cache the values for userspace */
3659         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3660                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3661                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3662                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3663                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3664                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3665                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3666                         adev->gfx.config.rb_config[i][j].raster_config =
3667                                 RREG32(mmPA_SC_RASTER_CONFIG);
3668                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3669                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3670                 }
3671         }
3672         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3673         mutex_unlock(&adev->grbm_idx_mutex);
3674 }
3675
3676 /**
3677  * gfx_v8_0_init_compute_vmid - gart enable
3678  *
3679  * @adev: amdgpu_device pointer
3680  *
3681  * Initialize compute vmid sh_mem registers
3682  *
3683  */
3684 #define DEFAULT_SH_MEM_BASES    (0x6000)
3685 #define FIRST_COMPUTE_VMID      (8)
3686 #define LAST_COMPUTE_VMID       (16)
3687 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3688 {
3689         int i;
3690         uint32_t sh_mem_config;
3691         uint32_t sh_mem_bases;
3692
3693         /*
3694          * Configure apertures:
3695          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3696          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3697          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3698          */
3699         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3700
3701         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3702                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3703                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3704                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3705                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3706                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3707
3708         mutex_lock(&adev->srbm_mutex);
3709         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3710                 vi_srbm_select(adev, 0, 0, 0, i);
3711                 /* CP and shaders */
3712                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3713                 WREG32(mmSH_MEM_APE1_BASE, 1);
3714                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3715                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3716         }
3717         vi_srbm_select(adev, 0, 0, 0, 0);
3718         mutex_unlock(&adev->srbm_mutex);
3719
3720         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3721            acccess. These should be enabled by FW for target VMIDs. */
3722         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3723                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3724                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3725                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3726                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3727         }
3728 }
3729
3730 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3731 {
3732         int vmid;
3733
3734         /*
3735          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3736          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3737          * the driver can enable them for graphics. VMID0 should maintain
3738          * access so that HWS firmware can save/restore entries.
3739          */
3740         for (vmid = 1; vmid < 16; vmid++) {
3741                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3742                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3743                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3744                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3745         }
3746 }
3747
3748 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3749 {
3750         switch (adev->asic_type) {
3751         default:
3752                 adev->gfx.config.double_offchip_lds_buf = 1;
3753                 break;
3754         case CHIP_CARRIZO:
3755         case CHIP_STONEY:
3756                 adev->gfx.config.double_offchip_lds_buf = 0;
3757                 break;
3758         }
3759 }
3760
3761 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3762 {
3763         u32 tmp, sh_static_mem_cfg;
3764         int i;
3765
3766         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3767         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3768         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3769         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3770
3771         gfx_v8_0_tiling_mode_table_init(adev);
3772         gfx_v8_0_setup_rb(adev);
3773         gfx_v8_0_get_cu_info(adev);
3774         gfx_v8_0_config_init(adev);
3775
3776         /* XXX SH_MEM regs */
3777         /* where to put LDS, scratch, GPUVM in FSA64 space */
3778         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3779                                    SWIZZLE_ENABLE, 1);
3780         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3781                                    ELEMENT_SIZE, 1);
3782         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3783                                    INDEX_STRIDE, 3);
3784         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3785
3786         mutex_lock(&adev->srbm_mutex);
3787         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3788                 vi_srbm_select(adev, 0, 0, 0, i);
3789                 /* CP and shaders */
3790                 if (i == 0) {
3791                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3792                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3793                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3794                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3795                         WREG32(mmSH_MEM_CONFIG, tmp);
3796                         WREG32(mmSH_MEM_BASES, 0);
3797                 } else {
3798                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3799                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3800                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3801                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3802                         WREG32(mmSH_MEM_CONFIG, tmp);
3803                         tmp = adev->gmc.shared_aperture_start >> 48;
3804                         WREG32(mmSH_MEM_BASES, tmp);
3805                 }
3806
3807                 WREG32(mmSH_MEM_APE1_BASE, 1);
3808                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3809         }
3810         vi_srbm_select(adev, 0, 0, 0, 0);
3811         mutex_unlock(&adev->srbm_mutex);
3812
3813         gfx_v8_0_init_compute_vmid(adev);
3814         gfx_v8_0_init_gds_vmid(adev);
3815
3816         mutex_lock(&adev->grbm_idx_mutex);
3817         /*
3818          * making sure that the following register writes will be broadcasted
3819          * to all the shaders
3820          */
3821         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3822
3823         WREG32(mmPA_SC_FIFO_SIZE,
3824                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3825                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3826                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3827                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3828                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3829                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3830                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3831                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3832
3833         tmp = RREG32(mmSPI_ARB_PRIORITY);
3834         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3835         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3836         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3837         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3838         WREG32(mmSPI_ARB_PRIORITY, tmp);
3839
3840         mutex_unlock(&adev->grbm_idx_mutex);
3841
3842 }
3843
3844 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3845 {
3846         u32 i, j, k;
3847         u32 mask;
3848
3849         mutex_lock(&adev->grbm_idx_mutex);
3850         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3851                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3852                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3853                         for (k = 0; k < adev->usec_timeout; k++) {
3854                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3855                                         break;
3856                                 udelay(1);
3857                         }
3858                         if (k == adev->usec_timeout) {
3859                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3860                                                       0xffffffff, 0xffffffff);
3861                                 mutex_unlock(&adev->grbm_idx_mutex);
3862                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3863                                          i, j);
3864                                 return;
3865                         }
3866                 }
3867         }
3868         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3869         mutex_unlock(&adev->grbm_idx_mutex);
3870
3871         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3872                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3873                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3874                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3875         for (k = 0; k < adev->usec_timeout; k++) {
3876                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3877                         break;
3878                 udelay(1);
3879         }
3880 }
3881
3882 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3883                                                bool enable)
3884 {
3885         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3886
3887         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3888         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3889         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3890         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3891
3892         WREG32(mmCP_INT_CNTL_RING0, tmp);
3893 }
3894
3895 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3896 {
3897         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3898         /* csib */
3899         WREG32(mmRLC_CSIB_ADDR_HI,
3900                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3901         WREG32(mmRLC_CSIB_ADDR_LO,
3902                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3903         WREG32(mmRLC_CSIB_LENGTH,
3904                         adev->gfx.rlc.clear_state_size);
3905 }
3906
3907 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3908                                 int ind_offset,
3909                                 int list_size,
3910                                 int *unique_indices,
3911                                 int *indices_count,
3912                                 int max_indices,
3913                                 int *ind_start_offsets,
3914                                 int *offset_count,
3915                                 int max_offset)
3916 {
3917         int indices;
3918         bool new_entry = true;
3919
3920         for (; ind_offset < list_size; ind_offset++) {
3921
3922                 if (new_entry) {
3923                         new_entry = false;
3924                         ind_start_offsets[*offset_count] = ind_offset;
3925                         *offset_count = *offset_count + 1;
3926                         BUG_ON(*offset_count >= max_offset);
3927                 }
3928
3929                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3930                         new_entry = true;
3931                         continue;
3932                 }
3933
3934                 ind_offset += 2;
3935
3936                 /* look for the matching indice */
3937                 for (indices = 0;
3938                         indices < *indices_count;
3939                         indices++) {
3940                         if (unique_indices[indices] ==
3941                                 register_list_format[ind_offset])
3942                                 break;
3943                 }
3944
3945                 if (indices >= *indices_count) {
3946                         unique_indices[*indices_count] =
3947                                 register_list_format[ind_offset];
3948                         indices = *indices_count;
3949                         *indices_count = *indices_count + 1;
3950                         BUG_ON(*indices_count >= max_indices);
3951                 }
3952
3953                 register_list_format[ind_offset] = indices;
3954         }
3955 }
3956
3957 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3958 {
3959         int i, temp, data;
3960         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3961         int indices_count = 0;
3962         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3963         int offset_count = 0;
3964
3965         int list_size;
3966         unsigned int *register_list_format =
3967                 kmemdup(adev->gfx.rlc.register_list_format,
3968                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3969         if (!register_list_format)
3970                 return -ENOMEM;
3971
3972         gfx_v8_0_parse_ind_reg_list(register_list_format,
3973                                 RLC_FormatDirectRegListLength,
3974                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3975                                 unique_indices,
3976                                 &indices_count,
3977                                 ARRAY_SIZE(unique_indices),
3978                                 indirect_start_offsets,
3979                                 &offset_count,
3980                                 ARRAY_SIZE(indirect_start_offsets));
3981
3982         /* save and restore list */
3983         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3984
3985         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3986         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3987                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3988
3989         /* indirect list */
3990         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3991         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3992                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3993
3994         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3995         list_size = list_size >> 1;
3996         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3997         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3998
3999         /* starting offsets starts */
4000         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4001                 adev->gfx.rlc.starting_offsets_start);
4002         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4003                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4004                                 indirect_start_offsets[i]);
4005
4006         /* unique indices */
4007         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4008         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4009         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4010                 if (unique_indices[i] != 0) {
4011                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4012                         WREG32(data + i, unique_indices[i] >> 20);
4013                 }
4014         }
4015         kfree(register_list_format);
4016
4017         return 0;
4018 }
4019
4020 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4021 {
4022         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4023 }
4024
4025 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4026 {
4027         uint32_t data;
4028
4029         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4030
4031         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4032         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4033         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4034         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4035         WREG32(mmRLC_PG_DELAY, data);
4036
4037         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4038         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4039
4040 }
4041
4042 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4043                                                 bool enable)
4044 {
4045         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4046 }
4047
4048 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4049                                                   bool enable)
4050 {
4051         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4052 }
4053
4054 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4055 {
4056         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4057 }
4058
4059 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4060 {
4061         if ((adev->asic_type == CHIP_CARRIZO) ||
4062             (adev->asic_type == CHIP_STONEY)) {
4063                 gfx_v8_0_init_csb(adev);
4064                 gfx_v8_0_init_save_restore_list(adev);
4065                 gfx_v8_0_enable_save_restore_machine(adev);
4066                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4067                 gfx_v8_0_init_power_gating(adev);
4068                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4069         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4070                    (adev->asic_type == CHIP_POLARIS12) ||
4071                    (adev->asic_type == CHIP_VEGAM)) {
4072                 gfx_v8_0_init_csb(adev);
4073                 gfx_v8_0_init_save_restore_list(adev);
4074                 gfx_v8_0_enable_save_restore_machine(adev);
4075                 gfx_v8_0_init_power_gating(adev);
4076         }
4077
4078 }
4079
4080 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4081 {
4082         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4083
4084         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4085         gfx_v8_0_wait_for_rlc_serdes(adev);
4086 }
4087
4088 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4089 {
4090         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4091         udelay(50);
4092
4093         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4094         udelay(50);
4095 }
4096
4097 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4098 {
4099         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4100
4101         /* carrizo do enable cp interrupt after cp inited */
4102         if (!(adev->flags & AMD_IS_APU))
4103                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4104
4105         udelay(50);
4106 }
4107
4108 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4109 {
4110         if (amdgpu_sriov_vf(adev)) {
4111                 gfx_v8_0_init_csb(adev);
4112                 return 0;
4113         }
4114
4115         adev->gfx.rlc.funcs->stop(adev);
4116         adev->gfx.rlc.funcs->reset(adev);
4117         gfx_v8_0_init_pg(adev);
4118         adev->gfx.rlc.funcs->start(adev);
4119
4120         return 0;
4121 }
4122
4123 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4124 {
4125         int i;
4126         u32 tmp = RREG32(mmCP_ME_CNTL);
4127
4128         if (enable) {
4129                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4130                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4131                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4132         } else {
4133                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4134                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4135                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4136                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4137                         adev->gfx.gfx_ring[i].sched.ready = false;
4138         }
4139         WREG32(mmCP_ME_CNTL, tmp);
4140         udelay(50);
4141 }
4142
4143 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4144 {
4145         u32 count = 0;
4146         const struct cs_section_def *sect = NULL;
4147         const struct cs_extent_def *ext = NULL;
4148
4149         /* begin clear state */
4150         count += 2;
4151         /* context control state */
4152         count += 3;
4153
4154         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4155                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4156                         if (sect->id == SECT_CONTEXT)
4157                                 count += 2 + ext->reg_count;
4158                         else
4159                                 return 0;
4160                 }
4161         }
4162         /* pa_sc_raster_config/pa_sc_raster_config1 */
4163         count += 4;
4164         /* end clear state */
4165         count += 2;
4166         /* clear state */
4167         count += 2;
4168
4169         return count;
4170 }
4171
4172 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4173 {
4174         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4175         const struct cs_section_def *sect = NULL;
4176         const struct cs_extent_def *ext = NULL;
4177         int r, i;
4178
4179         /* init the CP */
4180         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4181         WREG32(mmCP_ENDIAN_SWAP, 0);
4182         WREG32(mmCP_DEVICE_ID, 1);
4183
4184         gfx_v8_0_cp_gfx_enable(adev, true);
4185
4186         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4187         if (r) {
4188                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4189                 return r;
4190         }
4191
4192         /* clear state buffer */
4193         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4194         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4195
4196         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4197         amdgpu_ring_write(ring, 0x80000000);
4198         amdgpu_ring_write(ring, 0x80000000);
4199
4200         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4201                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4202                         if (sect->id == SECT_CONTEXT) {
4203                                 amdgpu_ring_write(ring,
4204                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4205                                                ext->reg_count));
4206                                 amdgpu_ring_write(ring,
4207                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4208                                 for (i = 0; i < ext->reg_count; i++)
4209                                         amdgpu_ring_write(ring, ext->extent[i]);
4210                         }
4211                 }
4212         }
4213
4214         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4215         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4216         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4217         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4218
4219         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4220         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4221
4222         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4223         amdgpu_ring_write(ring, 0);
4224
4225         /* init the CE partitions */
4226         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4227         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4228         amdgpu_ring_write(ring, 0x8000);
4229         amdgpu_ring_write(ring, 0x8000);
4230
4231         amdgpu_ring_commit(ring);
4232
4233         return 0;
4234 }
4235 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4236 {
4237         u32 tmp;
4238         /* no gfx doorbells on iceland */
4239         if (adev->asic_type == CHIP_TOPAZ)
4240                 return;
4241
4242         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4243
4244         if (ring->use_doorbell) {
4245                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4246                                 DOORBELL_OFFSET, ring->doorbell_index);
4247                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4248                                                 DOORBELL_HIT, 0);
4249                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4250                                             DOORBELL_EN, 1);
4251         } else {
4252                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4253         }
4254
4255         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4256
4257         if (adev->flags & AMD_IS_APU)
4258                 return;
4259
4260         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4261                                         DOORBELL_RANGE_LOWER,
4262                                         adev->doorbell_index.gfx_ring0);
4263         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4264
4265         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4266                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4267 }
4268
4269 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4270 {
4271         struct amdgpu_ring *ring;
4272         u32 tmp;
4273         u32 rb_bufsz;
4274         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4275
4276         /* Set the write pointer delay */
4277         WREG32(mmCP_RB_WPTR_DELAY, 0);
4278
4279         /* set the RB to use vmid 0 */
4280         WREG32(mmCP_RB_VMID, 0);
4281
4282         /* Set ring buffer size */
4283         ring = &adev->gfx.gfx_ring[0];
4284         rb_bufsz = order_base_2(ring->ring_size / 8);
4285         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4286         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4287         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4288         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4289 #ifdef __BIG_ENDIAN
4290         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4291 #endif
4292         WREG32(mmCP_RB0_CNTL, tmp);
4293
4294         /* Initialize the ring buffer's read and write pointers */
4295         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4296         ring->wptr = 0;
4297         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4298
4299         /* set the wb address wether it's enabled or not */
4300         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4301         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4302         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4303
4304         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4305         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4306         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4307         mdelay(1);
4308         WREG32(mmCP_RB0_CNTL, tmp);
4309
4310         rb_addr = ring->gpu_addr >> 8;
4311         WREG32(mmCP_RB0_BASE, rb_addr);
4312         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4313
4314         gfx_v8_0_set_cpg_door_bell(adev, ring);
4315         /* start the ring */
4316         amdgpu_ring_clear_ring(ring);
4317         gfx_v8_0_cp_gfx_start(adev);
4318         ring->sched.ready = true;
4319
4320         return 0;
4321 }
4322
4323 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4324 {
4325         int i;
4326
4327         if (enable) {
4328                 WREG32(mmCP_MEC_CNTL, 0);
4329         } else {
4330                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4331                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4332                         adev->gfx.compute_ring[i].sched.ready = false;
4333                 adev->gfx.kiq.ring.sched.ready = false;
4334         }
4335         udelay(50);
4336 }
4337
4338 /* KIQ functions */
4339 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4340 {
4341         uint32_t tmp;
4342         struct amdgpu_device *adev = ring->adev;
4343
4344         /* tell RLC which is KIQ queue */
4345         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4346         tmp &= 0xffffff00;
4347         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4348         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4349         tmp |= 0x80;
4350         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4351 }
4352
4353 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4354 {
4355         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4356         uint64_t queue_mask = 0;
4357         int r, i;
4358
4359         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4360                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4361                         continue;
4362
4363                 /* This situation may be hit in the future if a new HW
4364                  * generation exposes more than 64 queues. If so, the
4365                  * definition of queue_mask needs updating */
4366                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4367                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4368                         break;
4369                 }
4370
4371                 queue_mask |= (1ull << i);
4372         }
4373
4374         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4375         if (r) {
4376                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4377                 return r;
4378         }
4379         /* set resources */
4380         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4381         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4382         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4383         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4384         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4385         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4386         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4387         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4388         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4389                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4390                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4391                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4392
4393                 /* map queues */
4394                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4395                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4396                 amdgpu_ring_write(kiq_ring,
4397                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4398                 amdgpu_ring_write(kiq_ring,
4399                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4400                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4401                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4402                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4403                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4404                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4405                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4406                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4407         }
4408
4409         amdgpu_ring_commit(kiq_ring);
4410
4411         return 0;
4412 }
4413
4414 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4415 {
4416         int i, r = 0;
4417
4418         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4419                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4420                 for (i = 0; i < adev->usec_timeout; i++) {
4421                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4422                                 break;
4423                         udelay(1);
4424                 }
4425                 if (i == adev->usec_timeout)
4426                         r = -ETIMEDOUT;
4427         }
4428         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4429         WREG32(mmCP_HQD_PQ_RPTR, 0);
4430         WREG32(mmCP_HQD_PQ_WPTR, 0);
4431
4432         return r;
4433 }
4434
4435 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4436 {
4437         struct amdgpu_device *adev = ring->adev;
4438
4439         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4440                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4441                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4442                         ring->has_high_prio = true;
4443                         mqd->cp_hqd_queue_priority =
4444                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4445                 } else {
4446                         ring->has_high_prio = false;
4447                 }
4448         }
4449 }
4450
4451 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4452 {
4453         struct amdgpu_device *adev = ring->adev;
4454         struct vi_mqd *mqd = ring->mqd_ptr;
4455         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4456         uint32_t tmp;
4457
4458         mqd->header = 0xC0310800;
4459         mqd->compute_pipelinestat_enable = 0x00000001;
4460         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4461         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4462         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4463         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4464         mqd->compute_misc_reserved = 0x00000003;
4465         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4466                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4467         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4468                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4469         eop_base_addr = ring->eop_gpu_addr >> 8;
4470         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4471         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4472
4473         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4474         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4475         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4476                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4477
4478         mqd->cp_hqd_eop_control = tmp;
4479
4480         /* enable doorbell? */
4481         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4482                             CP_HQD_PQ_DOORBELL_CONTROL,
4483                             DOORBELL_EN,
4484                             ring->use_doorbell ? 1 : 0);
4485
4486         mqd->cp_hqd_pq_doorbell_control = tmp;
4487
4488         /* set the pointer to the MQD */
4489         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4490         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4491
4492         /* set MQD vmid to 0 */
4493         tmp = RREG32(mmCP_MQD_CONTROL);
4494         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4495         mqd->cp_mqd_control = tmp;
4496
4497         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4498         hqd_gpu_addr = ring->gpu_addr >> 8;
4499         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4500         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4501
4502         /* set up the HQD, this is similar to CP_RB0_CNTL */
4503         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4504         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4505                             (order_base_2(ring->ring_size / 4) - 1));
4506         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4507                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4508 #ifdef __BIG_ENDIAN
4509         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4510 #endif
4511         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4512         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4513         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4514         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4515         mqd->cp_hqd_pq_control = tmp;
4516
4517         /* set the wb address whether it's enabled or not */
4518         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4519         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4520         mqd->cp_hqd_pq_rptr_report_addr_hi =
4521                 upper_32_bits(wb_gpu_addr) & 0xffff;
4522
4523         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4524         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4525         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4526         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4527
4528         tmp = 0;
4529         /* enable the doorbell if requested */
4530         if (ring->use_doorbell) {
4531                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4532                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4533                                 DOORBELL_OFFSET, ring->doorbell_index);
4534
4535                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4536                                          DOORBELL_EN, 1);
4537                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4538                                          DOORBELL_SOURCE, 0);
4539                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4540                                          DOORBELL_HIT, 0);
4541         }
4542
4543         mqd->cp_hqd_pq_doorbell_control = tmp;
4544
4545         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4546         ring->wptr = 0;
4547         mqd->cp_hqd_pq_wptr = ring->wptr;
4548         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4549
4550         /* set the vmid for the queue */
4551         mqd->cp_hqd_vmid = 0;
4552
4553         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4554         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4555         mqd->cp_hqd_persistent_state = tmp;
4556
4557         /* set MTYPE */
4558         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4559         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4560         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4561         mqd->cp_hqd_ib_control = tmp;
4562
4563         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4564         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4565         mqd->cp_hqd_iq_timer = tmp;
4566
4567         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4568         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4569         mqd->cp_hqd_ctx_save_control = tmp;
4570
4571         /* defaults */
4572         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4573         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4574         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4575         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4576         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4577         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4578         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4579         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4580         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4581         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4582         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4583         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4584
4585         /* set static priority for a queue/ring */
4586         gfx_v8_0_mqd_set_priority(ring, mqd);
4587         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4588
4589         /* map_queues packet doesn't need activate the queue,
4590          * so only kiq need set this field.
4591          */
4592         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4593                 mqd->cp_hqd_active = 1;
4594
4595         return 0;
4596 }
4597
4598 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4599                         struct vi_mqd *mqd)
4600 {
4601         uint32_t mqd_reg;
4602         uint32_t *mqd_data;
4603
4604         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4605         mqd_data = &mqd->cp_mqd_base_addr_lo;
4606
4607         /* disable wptr polling */
4608         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4609
4610         /* program all HQD registers */
4611         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4612                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4613
4614         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4615          * This is safe since EOP RPTR==WPTR for any inactive HQD
4616          * on ASICs that do not support context-save.
4617          * EOP writes/reads can start anywhere in the ring.
4618          */
4619         if (adev->asic_type != CHIP_TONGA) {
4620                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4621                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4622                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4623         }
4624
4625         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4626                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4627
4628         /* activate the HQD */
4629         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4630                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4631
4632         return 0;
4633 }
4634
4635 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4636 {
4637         struct amdgpu_device *adev = ring->adev;
4638         struct vi_mqd *mqd = ring->mqd_ptr;
4639         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4640
4641         gfx_v8_0_kiq_setting(ring);
4642
4643         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4644                 /* reset MQD to a clean status */
4645                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4646                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4647
4648                 /* reset ring buffer */
4649                 ring->wptr = 0;
4650                 amdgpu_ring_clear_ring(ring);
4651                 mutex_lock(&adev->srbm_mutex);
4652                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4653                 gfx_v8_0_mqd_commit(adev, mqd);
4654                 vi_srbm_select(adev, 0, 0, 0, 0);
4655                 mutex_unlock(&adev->srbm_mutex);
4656         } else {
4657                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4658                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4659                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4660                 mutex_lock(&adev->srbm_mutex);
4661                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4662                 gfx_v8_0_mqd_init(ring);
4663                 gfx_v8_0_mqd_commit(adev, mqd);
4664                 vi_srbm_select(adev, 0, 0, 0, 0);
4665                 mutex_unlock(&adev->srbm_mutex);
4666
4667                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4668                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4669         }
4670
4671         return 0;
4672 }
4673
4674 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4675 {
4676         struct amdgpu_device *adev = ring->adev;
4677         struct vi_mqd *mqd = ring->mqd_ptr;
4678         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4679
4680         if (!adev->in_gpu_reset && !adev->in_suspend) {
4681                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4682                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4683                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4684                 mutex_lock(&adev->srbm_mutex);
4685                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4686                 gfx_v8_0_mqd_init(ring);
4687                 vi_srbm_select(adev, 0, 0, 0, 0);
4688                 mutex_unlock(&adev->srbm_mutex);
4689
4690                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4691                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4692         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4693                 /* reset MQD to a clean status */
4694                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4695                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4696                 /* reset ring buffer */
4697                 ring->wptr = 0;
4698                 amdgpu_ring_clear_ring(ring);
4699         } else {
4700                 amdgpu_ring_clear_ring(ring);
4701         }
4702         return 0;
4703 }
4704
4705 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4706 {
4707         if (adev->asic_type > CHIP_TONGA) {
4708                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4709                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4710         }
4711         /* enable doorbells */
4712         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4713 }
4714
4715 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4716 {
4717         struct amdgpu_ring *ring;
4718         int r;
4719
4720         ring = &adev->gfx.kiq.ring;
4721
4722         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4723         if (unlikely(r != 0))
4724                 return r;
4725
4726         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4727         if (unlikely(r != 0))
4728                 return r;
4729
4730         gfx_v8_0_kiq_init_queue(ring);
4731         amdgpu_bo_kunmap(ring->mqd_obj);
4732         ring->mqd_ptr = NULL;
4733         amdgpu_bo_unreserve(ring->mqd_obj);
4734         ring->sched.ready = true;
4735         return 0;
4736 }
4737
4738 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4739 {
4740         struct amdgpu_ring *ring = NULL;
4741         int r = 0, i;
4742
4743         gfx_v8_0_cp_compute_enable(adev, true);
4744
4745         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4746                 ring = &adev->gfx.compute_ring[i];
4747
4748                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4749                 if (unlikely(r != 0))
4750                         goto done;
4751                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4752                 if (!r) {
4753                         r = gfx_v8_0_kcq_init_queue(ring);
4754                         amdgpu_bo_kunmap(ring->mqd_obj);
4755                         ring->mqd_ptr = NULL;
4756                 }
4757                 amdgpu_bo_unreserve(ring->mqd_obj);
4758                 if (r)
4759                         goto done;
4760         }
4761
4762         gfx_v8_0_set_mec_doorbell_range(adev);
4763
4764         r = gfx_v8_0_kiq_kcq_enable(adev);
4765         if (r)
4766                 goto done;
4767
4768 done:
4769         return r;
4770 }
4771
4772 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4773 {
4774         int r, i;
4775         struct amdgpu_ring *ring;
4776
4777         /* collect all the ring_tests here, gfx, kiq, compute */
4778         ring = &adev->gfx.gfx_ring[0];
4779         r = amdgpu_ring_test_helper(ring);
4780         if (r)
4781                 return r;
4782
4783         ring = &adev->gfx.kiq.ring;
4784         r = amdgpu_ring_test_helper(ring);
4785         if (r)
4786                 return r;
4787
4788         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4789                 ring = &adev->gfx.compute_ring[i];
4790                 amdgpu_ring_test_helper(ring);
4791         }
4792
4793         return 0;
4794 }
4795
4796 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4797 {
4798         int r;
4799
4800         if (!(adev->flags & AMD_IS_APU))
4801                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4802
4803         r = gfx_v8_0_kiq_resume(adev);
4804         if (r)
4805                 return r;
4806
4807         r = gfx_v8_0_cp_gfx_resume(adev);
4808         if (r)
4809                 return r;
4810
4811         r = gfx_v8_0_kcq_resume(adev);
4812         if (r)
4813                 return r;
4814
4815         r = gfx_v8_0_cp_test_all_rings(adev);
4816         if (r)
4817                 return r;
4818
4819         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4820
4821         return 0;
4822 }
4823
4824 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4825 {
4826         gfx_v8_0_cp_gfx_enable(adev, enable);
4827         gfx_v8_0_cp_compute_enable(adev, enable);
4828 }
4829
4830 static int gfx_v8_0_hw_init(void *handle)
4831 {
4832         int r;
4833         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4834
4835         gfx_v8_0_init_golden_registers(adev);
4836         gfx_v8_0_constants_init(adev);
4837
4838         r = adev->gfx.rlc.funcs->resume(adev);
4839         if (r)
4840                 return r;
4841
4842         r = gfx_v8_0_cp_resume(adev);
4843
4844         return r;
4845 }
4846
4847 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4848 {
4849         int r, i;
4850         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4851
4852         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4853         if (r)
4854                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4855
4856         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4857                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4858
4859                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4860                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4861                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4862                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4863                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4864                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4865                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4866                 amdgpu_ring_write(kiq_ring, 0);
4867                 amdgpu_ring_write(kiq_ring, 0);
4868                 amdgpu_ring_write(kiq_ring, 0);
4869         }
4870         r = amdgpu_ring_test_helper(kiq_ring);
4871         if (r)
4872                 DRM_ERROR("KCQ disable failed\n");
4873
4874         return r;
4875 }
4876
4877 static bool gfx_v8_0_is_idle(void *handle)
4878 {
4879         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4880
4881         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4882                 || RREG32(mmGRBM_STATUS2) != 0x8)
4883                 return false;
4884         else
4885                 return true;
4886 }
4887
4888 static bool gfx_v8_0_rlc_is_idle(void *handle)
4889 {
4890         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4891
4892         if (RREG32(mmGRBM_STATUS2) != 0x8)
4893                 return false;
4894         else
4895                 return true;
4896 }
4897
4898 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4899 {
4900         unsigned int i;
4901         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4902
4903         for (i = 0; i < adev->usec_timeout; i++) {
4904                 if (gfx_v8_0_rlc_is_idle(handle))
4905                         return 0;
4906
4907                 udelay(1);
4908         }
4909         return -ETIMEDOUT;
4910 }
4911
4912 static int gfx_v8_0_wait_for_idle(void *handle)
4913 {
4914         unsigned int i;
4915         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4916
4917         for (i = 0; i < adev->usec_timeout; i++) {
4918                 if (gfx_v8_0_is_idle(handle))
4919                         return 0;
4920
4921                 udelay(1);
4922         }
4923         return -ETIMEDOUT;
4924 }
4925
4926 static int gfx_v8_0_hw_fini(void *handle)
4927 {
4928         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4929
4930         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4931         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4932
4933         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4934
4935         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4936
4937         /* disable KCQ to avoid CPC touch memory not valid anymore */
4938         gfx_v8_0_kcq_disable(adev);
4939
4940         if (amdgpu_sriov_vf(adev)) {
4941                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4942                 return 0;
4943         }
4944         amdgpu_gfx_rlc_enter_safe_mode(adev);
4945         if (!gfx_v8_0_wait_for_idle(adev))
4946                 gfx_v8_0_cp_enable(adev, false);
4947         else
4948                 pr_err("cp is busy, skip halt cp\n");
4949         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4950                 adev->gfx.rlc.funcs->stop(adev);
4951         else
4952                 pr_err("rlc is busy, skip halt rlc\n");
4953         amdgpu_gfx_rlc_exit_safe_mode(adev);
4954
4955         return 0;
4956 }
4957
4958 static int gfx_v8_0_suspend(void *handle)
4959 {
4960         return gfx_v8_0_hw_fini(handle);
4961 }
4962
4963 static int gfx_v8_0_resume(void *handle)
4964 {
4965         return gfx_v8_0_hw_init(handle);
4966 }
4967
4968 static bool gfx_v8_0_check_soft_reset(void *handle)
4969 {
4970         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4971         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972         u32 tmp;
4973
4974         /* GRBM_STATUS */
4975         tmp = RREG32(mmGRBM_STATUS);
4976         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4977                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4978                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4979                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4980                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4981                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4982                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4983                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4984                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4985                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4986                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4987                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4988                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4989         }
4990
4991         /* GRBM_STATUS2 */
4992         tmp = RREG32(mmGRBM_STATUS2);
4993         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4994                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4995                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4996
4997         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4998             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4999             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5000                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5001                                                 SOFT_RESET_CPF, 1);
5002                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5003                                                 SOFT_RESET_CPC, 1);
5004                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5005                                                 SOFT_RESET_CPG, 1);
5006                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5007                                                 SOFT_RESET_GRBM, 1);
5008         }
5009
5010         /* SRBM_STATUS */
5011         tmp = RREG32(mmSRBM_STATUS);
5012         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5013                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5014                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5015         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5016                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5017                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5018
5019         if (grbm_soft_reset || srbm_soft_reset) {
5020                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5021                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5022                 return true;
5023         } else {
5024                 adev->gfx.grbm_soft_reset = 0;
5025                 adev->gfx.srbm_soft_reset = 0;
5026                 return false;
5027         }
5028 }
5029
5030 static int gfx_v8_0_pre_soft_reset(void *handle)
5031 {
5032         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5033         u32 grbm_soft_reset = 0;
5034
5035         if ((!adev->gfx.grbm_soft_reset) &&
5036             (!adev->gfx.srbm_soft_reset))
5037                 return 0;
5038
5039         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5040
5041         /* stop the rlc */
5042         adev->gfx.rlc.funcs->stop(adev);
5043
5044         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5045             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5046                 /* Disable GFX parsing/prefetching */
5047                 gfx_v8_0_cp_gfx_enable(adev, false);
5048
5049         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5050             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5051             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5052             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5053                 int i;
5054
5055                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5056                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5057
5058                         mutex_lock(&adev->srbm_mutex);
5059                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5060                         gfx_v8_0_deactivate_hqd(adev, 2);
5061                         vi_srbm_select(adev, 0, 0, 0, 0);
5062                         mutex_unlock(&adev->srbm_mutex);
5063                 }
5064                 /* Disable MEC parsing/prefetching */
5065                 gfx_v8_0_cp_compute_enable(adev, false);
5066         }
5067
5068        return 0;
5069 }
5070
5071 static int gfx_v8_0_soft_reset(void *handle)
5072 {
5073         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5074         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5075         u32 tmp;
5076
5077         if ((!adev->gfx.grbm_soft_reset) &&
5078             (!adev->gfx.srbm_soft_reset))
5079                 return 0;
5080
5081         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5082         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5083
5084         if (grbm_soft_reset || srbm_soft_reset) {
5085                 tmp = RREG32(mmGMCON_DEBUG);
5086                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5087                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5088                 WREG32(mmGMCON_DEBUG, tmp);
5089                 udelay(50);
5090         }
5091
5092         if (grbm_soft_reset) {
5093                 tmp = RREG32(mmGRBM_SOFT_RESET);
5094                 tmp |= grbm_soft_reset;
5095                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5096                 WREG32(mmGRBM_SOFT_RESET, tmp);
5097                 tmp = RREG32(mmGRBM_SOFT_RESET);
5098
5099                 udelay(50);
5100
5101                 tmp &= ~grbm_soft_reset;
5102                 WREG32(mmGRBM_SOFT_RESET, tmp);
5103                 tmp = RREG32(mmGRBM_SOFT_RESET);
5104         }
5105
5106         if (srbm_soft_reset) {
5107                 tmp = RREG32(mmSRBM_SOFT_RESET);
5108                 tmp |= srbm_soft_reset;
5109                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5110                 WREG32(mmSRBM_SOFT_RESET, tmp);
5111                 tmp = RREG32(mmSRBM_SOFT_RESET);
5112
5113                 udelay(50);
5114
5115                 tmp &= ~srbm_soft_reset;
5116                 WREG32(mmSRBM_SOFT_RESET, tmp);
5117                 tmp = RREG32(mmSRBM_SOFT_RESET);
5118         }
5119
5120         if (grbm_soft_reset || srbm_soft_reset) {
5121                 tmp = RREG32(mmGMCON_DEBUG);
5122                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5123                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5124                 WREG32(mmGMCON_DEBUG, tmp);
5125         }
5126
5127         /* Wait a little for things to settle down */
5128         udelay(50);
5129
5130         return 0;
5131 }
5132
5133 static int gfx_v8_0_post_soft_reset(void *handle)
5134 {
5135         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5136         u32 grbm_soft_reset = 0;
5137
5138         if ((!adev->gfx.grbm_soft_reset) &&
5139             (!adev->gfx.srbm_soft_reset))
5140                 return 0;
5141
5142         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5143
5144         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5145             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5146             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5147             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5148                 int i;
5149
5150                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5151                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5152
5153                         mutex_lock(&adev->srbm_mutex);
5154                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5155                         gfx_v8_0_deactivate_hqd(adev, 2);
5156                         vi_srbm_select(adev, 0, 0, 0, 0);
5157                         mutex_unlock(&adev->srbm_mutex);
5158                 }
5159                 gfx_v8_0_kiq_resume(adev);
5160                 gfx_v8_0_kcq_resume(adev);
5161         }
5162
5163         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5164             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5165                 gfx_v8_0_cp_gfx_resume(adev);
5166
5167         gfx_v8_0_cp_test_all_rings(adev);
5168
5169         adev->gfx.rlc.funcs->start(adev);
5170
5171         return 0;
5172 }
5173
5174 /**
5175  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5176  *
5177  * @adev: amdgpu_device pointer
5178  *
5179  * Fetches a GPU clock counter snapshot.
5180  * Returns the 64 bit clock counter snapshot.
5181  */
5182 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5183 {
5184         uint64_t clock;
5185
5186         mutex_lock(&adev->gfx.gpu_clock_mutex);
5187         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5188         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5189                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5190         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5191         return clock;
5192 }
5193
5194 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5195                                           uint32_t vmid,
5196                                           uint32_t gds_base, uint32_t gds_size,
5197                                           uint32_t gws_base, uint32_t gws_size,
5198                                           uint32_t oa_base, uint32_t oa_size)
5199 {
5200         /* GDS Base */
5201         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5202         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5203                                 WRITE_DATA_DST_SEL(0)));
5204         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5205         amdgpu_ring_write(ring, 0);
5206         amdgpu_ring_write(ring, gds_base);
5207
5208         /* GDS Size */
5209         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5210         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5211                                 WRITE_DATA_DST_SEL(0)));
5212         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5213         amdgpu_ring_write(ring, 0);
5214         amdgpu_ring_write(ring, gds_size);
5215
5216         /* GWS */
5217         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5218         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5219                                 WRITE_DATA_DST_SEL(0)));
5220         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5221         amdgpu_ring_write(ring, 0);
5222         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5223
5224         /* OA */
5225         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5226         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5227                                 WRITE_DATA_DST_SEL(0)));
5228         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5229         amdgpu_ring_write(ring, 0);
5230         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5231 }
5232
5233 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5234 {
5235         WREG32(mmSQ_IND_INDEX,
5236                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5237                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5238                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5239                 (SQ_IND_INDEX__FORCE_READ_MASK));
5240         return RREG32(mmSQ_IND_DATA);
5241 }
5242
5243 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5244                            uint32_t wave, uint32_t thread,
5245                            uint32_t regno, uint32_t num, uint32_t *out)
5246 {
5247         WREG32(mmSQ_IND_INDEX,
5248                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5249                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5250                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5251                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5252                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5253                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5254         while (num--)
5255                 *(out++) = RREG32(mmSQ_IND_DATA);
5256 }
5257
5258 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5259 {
5260         /* type 0 wave data */
5261         dst[(*no_fields)++] = 0;
5262         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5280 }
5281
5282 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5283                                      uint32_t wave, uint32_t start,
5284                                      uint32_t size, uint32_t *dst)
5285 {
5286         wave_read_regs(
5287                 adev, simd, wave, 0,
5288                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5289 }
5290
5291
5292 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5293         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5294         .select_se_sh = &gfx_v8_0_select_se_sh,
5295         .read_wave_data = &gfx_v8_0_read_wave_data,
5296         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5297         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5298 };
5299
5300 static int gfx_v8_0_early_init(void *handle)
5301 {
5302         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5303
5304         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5305         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5306         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5307         gfx_v8_0_set_ring_funcs(adev);
5308         gfx_v8_0_set_irq_funcs(adev);
5309         gfx_v8_0_set_gds_init(adev);
5310         gfx_v8_0_set_rlc_funcs(adev);
5311
5312         return 0;
5313 }
5314
5315 static int gfx_v8_0_late_init(void *handle)
5316 {
5317         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5318         int r;
5319
5320         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5321         if (r)
5322                 return r;
5323
5324         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5325         if (r)
5326                 return r;
5327
5328         /* requires IBs so do in late init after IB pool is initialized */
5329         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5330         if (r)
5331                 return r;
5332
5333         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5334         if (r) {
5335                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5336                 return r;
5337         }
5338
5339         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5340         if (r) {
5341                 DRM_ERROR(
5342                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5343                         r);
5344                 return r;
5345         }
5346
5347         return 0;
5348 }
5349
5350 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5351                                                        bool enable)
5352 {
5353         if (((adev->asic_type == CHIP_POLARIS11) ||
5354             (adev->asic_type == CHIP_POLARIS12) ||
5355             (adev->asic_type == CHIP_VEGAM)) &&
5356             adev->powerplay.pp_funcs->set_powergating_by_smu)
5357                 /* Send msg to SMU via Powerplay */
5358                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5359
5360         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5361 }
5362
5363 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5364                                                         bool enable)
5365 {
5366         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5367 }
5368
5369 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5370                 bool enable)
5371 {
5372         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5373 }
5374
5375 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5376                                           bool enable)
5377 {
5378         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5379 }
5380
5381 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5382                                                 bool enable)
5383 {
5384         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5385
5386         /* Read any GFX register to wake up GFX. */
5387         if (!enable)
5388                 RREG32(mmDB_RENDER_CONTROL);
5389 }
5390
5391 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5392                                           bool enable)
5393 {
5394         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5395                 cz_enable_gfx_cg_power_gating(adev, true);
5396                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5397                         cz_enable_gfx_pipeline_power_gating(adev, true);
5398         } else {
5399                 cz_enable_gfx_cg_power_gating(adev, false);
5400                 cz_enable_gfx_pipeline_power_gating(adev, false);
5401         }
5402 }
5403
5404 static int gfx_v8_0_set_powergating_state(void *handle,
5405                                           enum amd_powergating_state state)
5406 {
5407         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5408         bool enable = (state == AMD_PG_STATE_GATE);
5409
5410         if (amdgpu_sriov_vf(adev))
5411                 return 0;
5412
5413         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5414                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5415                                 AMD_PG_SUPPORT_CP |
5416                                 AMD_PG_SUPPORT_GFX_DMG))
5417                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5418         switch (adev->asic_type) {
5419         case CHIP_CARRIZO:
5420         case CHIP_STONEY:
5421
5422                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5423                         cz_enable_sck_slow_down_on_power_up(adev, true);
5424                         cz_enable_sck_slow_down_on_power_down(adev, true);
5425                 } else {
5426                         cz_enable_sck_slow_down_on_power_up(adev, false);
5427                         cz_enable_sck_slow_down_on_power_down(adev, false);
5428                 }
5429                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5430                         cz_enable_cp_power_gating(adev, true);
5431                 else
5432                         cz_enable_cp_power_gating(adev, false);
5433
5434                 cz_update_gfx_cg_power_gating(adev, enable);
5435
5436                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5437                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5438                 else
5439                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5440
5441                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5442                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5443                 else
5444                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5445                 break;
5446         case CHIP_POLARIS11:
5447         case CHIP_POLARIS12:
5448         case CHIP_VEGAM:
5449                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5450                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5451                 else
5452                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5453
5454                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5455                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5456                 else
5457                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5458
5459                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5460                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5461                 else
5462                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5463                 break;
5464         default:
5465                 break;
5466         }
5467         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5468                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5469                                 AMD_PG_SUPPORT_CP |
5470                                 AMD_PG_SUPPORT_GFX_DMG))
5471                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5472         return 0;
5473 }
5474
5475 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5476 {
5477         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5478         int data;
5479
5480         if (amdgpu_sriov_vf(adev))
5481                 *flags = 0;
5482
5483         /* AMD_CG_SUPPORT_GFX_MGCG */
5484         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5485         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5486                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5487
5488         /* AMD_CG_SUPPORT_GFX_CGLG */
5489         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5490         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5491                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5492
5493         /* AMD_CG_SUPPORT_GFX_CGLS */
5494         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5495                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5496
5497         /* AMD_CG_SUPPORT_GFX_CGTS */
5498         data = RREG32(mmCGTS_SM_CTRL_REG);
5499         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5500                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5501
5502         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5503         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5504                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5505
5506         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5507         data = RREG32(mmRLC_MEM_SLP_CNTL);
5508         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5509                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5510
5511         /* AMD_CG_SUPPORT_GFX_CP_LS */
5512         data = RREG32(mmCP_MEM_SLP_CNTL);
5513         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5514                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5515 }
5516
5517 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5518                                      uint32_t reg_addr, uint32_t cmd)
5519 {
5520         uint32_t data;
5521
5522         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5523
5524         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5525         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5526
5527         data = RREG32(mmRLC_SERDES_WR_CTRL);
5528         if (adev->asic_type == CHIP_STONEY)
5529                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5530                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5531                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5532                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5533                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5534                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5535                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5536                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5537                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5538         else
5539                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5540                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5541                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5542                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5543                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5544                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5545                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5546                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5547                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5548                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5549                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5550         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5551                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5552                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5553                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5554
5555         WREG32(mmRLC_SERDES_WR_CTRL, data);
5556 }
5557
5558 #define MSG_ENTER_RLC_SAFE_MODE     1
5559 #define MSG_EXIT_RLC_SAFE_MODE      0
5560 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5561 #define RLC_GPR_REG2__REQ__SHIFT 0
5562 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5563 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5564
5565 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5566 {
5567         uint32_t rlc_setting;
5568
5569         rlc_setting = RREG32(mmRLC_CNTL);
5570         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5571                 return false;
5572
5573         return true;
5574 }
5575
5576 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5577 {
5578         uint32_t data;
5579         unsigned i;
5580         data = RREG32(mmRLC_CNTL);
5581         data |= RLC_SAFE_MODE__CMD_MASK;
5582         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5583         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5584         WREG32(mmRLC_SAFE_MODE, data);
5585
5586         /* wait for RLC_SAFE_MODE */
5587         for (i = 0; i < adev->usec_timeout; i++) {
5588                 if ((RREG32(mmRLC_GPM_STAT) &
5589                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5590                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5591                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5592                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5593                         break;
5594                 udelay(1);
5595         }
5596         for (i = 0; i < adev->usec_timeout; i++) {
5597                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5598                         break;
5599                 udelay(1);
5600         }
5601 }
5602
5603 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5604 {
5605         uint32_t data;
5606         unsigned i;
5607
5608         data = RREG32(mmRLC_CNTL);
5609         data |= RLC_SAFE_MODE__CMD_MASK;
5610         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5611         WREG32(mmRLC_SAFE_MODE, data);
5612
5613         for (i = 0; i < adev->usec_timeout; i++) {
5614                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5615                         break;
5616                 udelay(1);
5617         }
5618 }
5619
5620 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5621 {
5622         u32 data;
5623
5624         data = RREG32(mmRLC_SPM_VMID);
5625
5626         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5627         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5628
5629         WREG32(mmRLC_SPM_VMID, data);
5630 }
5631
5632 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5633         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5634         .set_safe_mode = gfx_v8_0_set_safe_mode,
5635         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5636         .init = gfx_v8_0_rlc_init,
5637         .get_csb_size = gfx_v8_0_get_csb_size,
5638         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5639         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5640         .resume = gfx_v8_0_rlc_resume,
5641         .stop = gfx_v8_0_rlc_stop,
5642         .reset = gfx_v8_0_rlc_reset,
5643         .start = gfx_v8_0_rlc_start,
5644         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5645 };
5646
5647 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5648                                                       bool enable)
5649 {
5650         uint32_t temp, data;
5651
5652         amdgpu_gfx_rlc_enter_safe_mode(adev);
5653
5654         /* It is disabled by HW by default */
5655         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5656                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5657                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5658                                 /* 1 - RLC memory Light sleep */
5659                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5660
5661                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5662                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5663                 }
5664
5665                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5666                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5667                 if (adev->flags & AMD_IS_APU)
5668                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5669                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5670                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5671                 else
5672                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5673                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5674                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5675                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5676
5677                 if (temp != data)
5678                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5679
5680                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5681                 gfx_v8_0_wait_for_rlc_serdes(adev);
5682
5683                 /* 5 - clear mgcg override */
5684                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5685
5686                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5687                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5688                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5689                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5690                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5691                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5692                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5693                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5694                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5695                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5696                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5697                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5698                         if (temp != data)
5699                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5700                 }
5701                 udelay(50);
5702
5703                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704                 gfx_v8_0_wait_for_rlc_serdes(adev);
5705         } else {
5706                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5707                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5708                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5709                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5710                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5711                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5712                 if (temp != data)
5713                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5714
5715                 /* 2 - disable MGLS in RLC */
5716                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5717                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5718                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5719                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5720                 }
5721
5722                 /* 3 - disable MGLS in CP */
5723                 data = RREG32(mmCP_MEM_SLP_CNTL);
5724                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5725                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5726                         WREG32(mmCP_MEM_SLP_CNTL, data);
5727                 }
5728
5729                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5730                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5731                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5732                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5733                 if (temp != data)
5734                         WREG32(mmCGTS_SM_CTRL_REG, data);
5735
5736                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5737                 gfx_v8_0_wait_for_rlc_serdes(adev);
5738
5739                 /* 6 - set mgcg override */
5740                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5741
5742                 udelay(50);
5743
5744                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745                 gfx_v8_0_wait_for_rlc_serdes(adev);
5746         }
5747
5748         amdgpu_gfx_rlc_exit_safe_mode(adev);
5749 }
5750
5751 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5752                                                       bool enable)
5753 {
5754         uint32_t temp, temp1, data, data1;
5755
5756         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5757
5758         amdgpu_gfx_rlc_enter_safe_mode(adev);
5759
5760         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5761                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5762                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5763                 if (temp1 != data1)
5764                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5765
5766                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5767                 gfx_v8_0_wait_for_rlc_serdes(adev);
5768
5769                 /* 2 - clear cgcg override */
5770                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5771
5772                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5773                 gfx_v8_0_wait_for_rlc_serdes(adev);
5774
5775                 /* 3 - write cmd to set CGLS */
5776                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5777
5778                 /* 4 - enable cgcg */
5779                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5780
5781                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5782                         /* enable cgls*/
5783                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5784
5785                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5786                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5787
5788                         if (temp1 != data1)
5789                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5790                 } else {
5791                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5792                 }
5793
5794                 if (temp != data)
5795                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5796
5797                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5798                  * Cmp_busy/GFX_Idle interrupts
5799                  */
5800                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5801         } else {
5802                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5803                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5804
5805                 /* TEST CGCG */
5806                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5807                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5808                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5809                 if (temp1 != data1)
5810                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5811
5812                 /* read gfx register to wake up cgcg */
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814                 RREG32(mmCB_CGTT_SCLK_CTRL);
5815                 RREG32(mmCB_CGTT_SCLK_CTRL);
5816                 RREG32(mmCB_CGTT_SCLK_CTRL);
5817
5818                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5819                 gfx_v8_0_wait_for_rlc_serdes(adev);
5820
5821                 /* write cmd to Set CGCG Overrride */
5822                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5823
5824                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825                 gfx_v8_0_wait_for_rlc_serdes(adev);
5826
5827                 /* write cmd to Clear CGLS */
5828                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5829
5830                 /* disable cgcg, cgls should be disabled too. */
5831                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5832                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5833                 if (temp != data)
5834                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5835                 /* enable interrupts again for PG */
5836                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5837         }
5838
5839         gfx_v8_0_wait_for_rlc_serdes(adev);
5840
5841         amdgpu_gfx_rlc_exit_safe_mode(adev);
5842 }
5843 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5844                                             bool enable)
5845 {
5846         if (enable) {
5847                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5848                  * ===  MGCG + MGLS + TS(CG/LS) ===
5849                  */
5850                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5851                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5852         } else {
5853                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5854                  * ===  CGCG + CGLS ===
5855                  */
5856                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5857                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5858         }
5859         return 0;
5860 }
5861
5862 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5863                                           enum amd_clockgating_state state)
5864 {
5865         uint32_t msg_id, pp_state = 0;
5866         uint32_t pp_support_state = 0;
5867
5868         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5870                         pp_support_state = PP_STATE_SUPPORT_LS;
5871                         pp_state = PP_STATE_LS;
5872                 }
5873                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5874                         pp_support_state |= PP_STATE_SUPPORT_CG;
5875                         pp_state |= PP_STATE_CG;
5876                 }
5877                 if (state == AMD_CG_STATE_UNGATE)
5878                         pp_state = 0;
5879
5880                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5881                                 PP_BLOCK_GFX_CG,
5882                                 pp_support_state,
5883                                 pp_state);
5884                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5885                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5886         }
5887
5888         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5889                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5890                         pp_support_state = PP_STATE_SUPPORT_LS;
5891                         pp_state = PP_STATE_LS;
5892                 }
5893
5894                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5895                         pp_support_state |= PP_STATE_SUPPORT_CG;
5896                         pp_state |= PP_STATE_CG;
5897                 }
5898
5899                 if (state == AMD_CG_STATE_UNGATE)
5900                         pp_state = 0;
5901
5902                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5903                                 PP_BLOCK_GFX_MG,
5904                                 pp_support_state,
5905                                 pp_state);
5906                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5907                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5908         }
5909
5910         return 0;
5911 }
5912
5913 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5914                                           enum amd_clockgating_state state)
5915 {
5916
5917         uint32_t msg_id, pp_state = 0;
5918         uint32_t pp_support_state = 0;
5919
5920         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5921                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5922                         pp_support_state = PP_STATE_SUPPORT_LS;
5923                         pp_state = PP_STATE_LS;
5924                 }
5925                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5926                         pp_support_state |= PP_STATE_SUPPORT_CG;
5927                         pp_state |= PP_STATE_CG;
5928                 }
5929                 if (state == AMD_CG_STATE_UNGATE)
5930                         pp_state = 0;
5931
5932                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5933                                 PP_BLOCK_GFX_CG,
5934                                 pp_support_state,
5935                                 pp_state);
5936                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5937                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5938         }
5939
5940         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5941                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5942                         pp_support_state = PP_STATE_SUPPORT_LS;
5943                         pp_state = PP_STATE_LS;
5944                 }
5945                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5946                         pp_support_state |= PP_STATE_SUPPORT_CG;
5947                         pp_state |= PP_STATE_CG;
5948                 }
5949                 if (state == AMD_CG_STATE_UNGATE)
5950                         pp_state = 0;
5951
5952                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5953                                 PP_BLOCK_GFX_3D,
5954                                 pp_support_state,
5955                                 pp_state);
5956                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5957                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5958         }
5959
5960         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5961                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5962                         pp_support_state = PP_STATE_SUPPORT_LS;
5963                         pp_state = PP_STATE_LS;
5964                 }
5965
5966                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5967                         pp_support_state |= PP_STATE_SUPPORT_CG;
5968                         pp_state |= PP_STATE_CG;
5969                 }
5970
5971                 if (state == AMD_CG_STATE_UNGATE)
5972                         pp_state = 0;
5973
5974                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5975                                 PP_BLOCK_GFX_MG,
5976                                 pp_support_state,
5977                                 pp_state);
5978                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5979                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5980         }
5981
5982         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5983                 pp_support_state = PP_STATE_SUPPORT_LS;
5984
5985                 if (state == AMD_CG_STATE_UNGATE)
5986                         pp_state = 0;
5987                 else
5988                         pp_state = PP_STATE_LS;
5989
5990                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5991                                 PP_BLOCK_GFX_RLC,
5992                                 pp_support_state,
5993                                 pp_state);
5994                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5995                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5996         }
5997
5998         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5999                 pp_support_state = PP_STATE_SUPPORT_LS;
6000
6001                 if (state == AMD_CG_STATE_UNGATE)
6002                         pp_state = 0;
6003                 else
6004                         pp_state = PP_STATE_LS;
6005                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6006                         PP_BLOCK_GFX_CP,
6007                         pp_support_state,
6008                         pp_state);
6009                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6010                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6011         }
6012
6013         return 0;
6014 }
6015
6016 static int gfx_v8_0_set_clockgating_state(void *handle,
6017                                           enum amd_clockgating_state state)
6018 {
6019         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6020
6021         if (amdgpu_sriov_vf(adev))
6022                 return 0;
6023
6024         switch (adev->asic_type) {
6025         case CHIP_FIJI:
6026         case CHIP_CARRIZO:
6027         case CHIP_STONEY:
6028                 gfx_v8_0_update_gfx_clock_gating(adev,
6029                                                  state == AMD_CG_STATE_GATE);
6030                 break;
6031         case CHIP_TONGA:
6032                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6033                 break;
6034         case CHIP_POLARIS10:
6035         case CHIP_POLARIS11:
6036         case CHIP_POLARIS12:
6037         case CHIP_VEGAM:
6038                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6039                 break;
6040         default:
6041                 break;
6042         }
6043         return 0;
6044 }
6045
6046 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6047 {
6048         return ring->adev->wb.wb[ring->rptr_offs];
6049 }
6050
6051 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6052 {
6053         struct amdgpu_device *adev = ring->adev;
6054
6055         if (ring->use_doorbell)
6056                 /* XXX check if swapping is necessary on BE */
6057                 return ring->adev->wb.wb[ring->wptr_offs];
6058         else
6059                 return RREG32(mmCP_RB0_WPTR);
6060 }
6061
6062 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6063 {
6064         struct amdgpu_device *adev = ring->adev;
6065
6066         if (ring->use_doorbell) {
6067                 /* XXX check if swapping is necessary on BE */
6068                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6069                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6070         } else {
6071                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6072                 (void)RREG32(mmCP_RB0_WPTR);
6073         }
6074 }
6075
6076 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6077 {
6078         u32 ref_and_mask, reg_mem_engine;
6079
6080         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6081             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6082                 switch (ring->me) {
6083                 case 1:
6084                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6085                         break;
6086                 case 2:
6087                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6088                         break;
6089                 default:
6090                         return;
6091                 }
6092                 reg_mem_engine = 0;
6093         } else {
6094                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6095                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6096         }
6097
6098         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6099         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6100                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6101                                  reg_mem_engine));
6102         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6103         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6104         amdgpu_ring_write(ring, ref_and_mask);
6105         amdgpu_ring_write(ring, ref_and_mask);
6106         amdgpu_ring_write(ring, 0x20); /* poll interval */
6107 }
6108
6109 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6110 {
6111         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6112         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6113                 EVENT_INDEX(4));
6114
6115         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6116         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6117                 EVENT_INDEX(0));
6118 }
6119
6120 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6121                                         struct amdgpu_job *job,
6122                                         struct amdgpu_ib *ib,
6123                                         uint32_t flags)
6124 {
6125         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6126         u32 header, control = 0;
6127
6128         if (ib->flags & AMDGPU_IB_FLAG_CE)
6129                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6130         else
6131                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6132
6133         control |= ib->length_dw | (vmid << 24);
6134
6135         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6136                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6137
6138                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6139                         gfx_v8_0_ring_emit_de_meta(ring);
6140         }
6141
6142         amdgpu_ring_write(ring, header);
6143         amdgpu_ring_write(ring,
6144 #ifdef __BIG_ENDIAN
6145                           (2 << 0) |
6146 #endif
6147                           (ib->gpu_addr & 0xFFFFFFFC));
6148         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6149         amdgpu_ring_write(ring, control);
6150 }
6151
6152 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6153                                           struct amdgpu_job *job,
6154                                           struct amdgpu_ib *ib,
6155                                           uint32_t flags)
6156 {
6157         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6158         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6159
6160         /* Currently, there is a high possibility to get wave ID mismatch
6161          * between ME and GDS, leading to a hw deadlock, because ME generates
6162          * different wave IDs than the GDS expects. This situation happens
6163          * randomly when at least 5 compute pipes use GDS ordered append.
6164          * The wave IDs generated by ME are also wrong after suspend/resume.
6165          * Those are probably bugs somewhere else in the kernel driver.
6166          *
6167          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6168          * GDS to 0 for this ring (me/pipe).
6169          */
6170         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6171                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6172                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6173                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6174         }
6175
6176         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6177         amdgpu_ring_write(ring,
6178 #ifdef __BIG_ENDIAN
6179                                 (2 << 0) |
6180 #endif
6181                                 (ib->gpu_addr & 0xFFFFFFFC));
6182         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6183         amdgpu_ring_write(ring, control);
6184 }
6185
6186 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6187                                          u64 seq, unsigned flags)
6188 {
6189         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6190         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6191
6192         /* Workaround for cache flush problems. First send a dummy EOP
6193          * event down the pipe with seq one below.
6194          */
6195         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6196         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6197                                  EOP_TC_ACTION_EN |
6198                                  EOP_TC_WB_ACTION_EN |
6199                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6200                                  EVENT_INDEX(5)));
6201         amdgpu_ring_write(ring, addr & 0xfffffffc);
6202         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6203                                 DATA_SEL(1) | INT_SEL(0));
6204         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6205         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6206
6207         /* Then send the real EOP event down the pipe:
6208          * EVENT_WRITE_EOP - flush caches, send int */
6209         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6210         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6211                                  EOP_TC_ACTION_EN |
6212                                  EOP_TC_WB_ACTION_EN |
6213                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6214                                  EVENT_INDEX(5)));
6215         amdgpu_ring_write(ring, addr & 0xfffffffc);
6216         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6217                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6218         amdgpu_ring_write(ring, lower_32_bits(seq));
6219         amdgpu_ring_write(ring, upper_32_bits(seq));
6220
6221 }
6222
6223 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6224 {
6225         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6226         uint32_t seq = ring->fence_drv.sync_seq;
6227         uint64_t addr = ring->fence_drv.gpu_addr;
6228
6229         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6230         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6231                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6232                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6233         amdgpu_ring_write(ring, addr & 0xfffffffc);
6234         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6235         amdgpu_ring_write(ring, seq);
6236         amdgpu_ring_write(ring, 0xffffffff);
6237         amdgpu_ring_write(ring, 4); /* poll interval */
6238 }
6239
6240 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6241                                         unsigned vmid, uint64_t pd_addr)
6242 {
6243         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6244
6245         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6246
6247         /* wait for the invalidate to complete */
6248         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6249         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6250                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6251                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6252         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6253         amdgpu_ring_write(ring, 0);
6254         amdgpu_ring_write(ring, 0); /* ref */
6255         amdgpu_ring_write(ring, 0); /* mask */
6256         amdgpu_ring_write(ring, 0x20); /* poll interval */
6257
6258         /* compute doesn't have PFP */
6259         if (usepfp) {
6260                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6261                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6262                 amdgpu_ring_write(ring, 0x0);
6263         }
6264 }
6265
6266 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6267 {
6268         return ring->adev->wb.wb[ring->wptr_offs];
6269 }
6270
6271 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6272 {
6273         struct amdgpu_device *adev = ring->adev;
6274
6275         /* XXX check if swapping is necessary on BE */
6276         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6277         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6278 }
6279
6280 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6281                                              u64 addr, u64 seq,
6282                                              unsigned flags)
6283 {
6284         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6285         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6286
6287         /* RELEASE_MEM - flush caches, send int */
6288         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6289         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6290                                  EOP_TC_ACTION_EN |
6291                                  EOP_TC_WB_ACTION_EN |
6292                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6293                                  EVENT_INDEX(5)));
6294         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6295         amdgpu_ring_write(ring, addr & 0xfffffffc);
6296         amdgpu_ring_write(ring, upper_32_bits(addr));
6297         amdgpu_ring_write(ring, lower_32_bits(seq));
6298         amdgpu_ring_write(ring, upper_32_bits(seq));
6299 }
6300
6301 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6302                                          u64 seq, unsigned int flags)
6303 {
6304         /* we only allocate 32bit for each seq wb address */
6305         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6306
6307         /* write fence seq to the "addr" */
6308         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6309         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6310                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6311         amdgpu_ring_write(ring, lower_32_bits(addr));
6312         amdgpu_ring_write(ring, upper_32_bits(addr));
6313         amdgpu_ring_write(ring, lower_32_bits(seq));
6314
6315         if (flags & AMDGPU_FENCE_FLAG_INT) {
6316                 /* set register to trigger INT */
6317                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6318                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6319                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6320                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6321                 amdgpu_ring_write(ring, 0);
6322                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6323         }
6324 }
6325
6326 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6327 {
6328         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6329         amdgpu_ring_write(ring, 0);
6330 }
6331
6332 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6333 {
6334         uint32_t dw2 = 0;
6335
6336         if (amdgpu_sriov_vf(ring->adev))
6337                 gfx_v8_0_ring_emit_ce_meta(ring);
6338
6339         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6340         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6341                 gfx_v8_0_ring_emit_vgt_flush(ring);
6342                 /* set load_global_config & load_global_uconfig */
6343                 dw2 |= 0x8001;
6344                 /* set load_cs_sh_regs */
6345                 dw2 |= 0x01000000;
6346                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6347                 dw2 |= 0x10002;
6348
6349                 /* set load_ce_ram if preamble presented */
6350                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6351                         dw2 |= 0x10000000;
6352         } else {
6353                 /* still load_ce_ram if this is the first time preamble presented
6354                  * although there is no context switch happens.
6355                  */
6356                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6357                         dw2 |= 0x10000000;
6358         }
6359
6360         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6361         amdgpu_ring_write(ring, dw2);
6362         amdgpu_ring_write(ring, 0);
6363 }
6364
6365 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6366 {
6367         unsigned ret;
6368
6369         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6370         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6371         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6372         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6373         ret = ring->wptr & ring->buf_mask;
6374         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6375         return ret;
6376 }
6377
6378 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6379 {
6380         unsigned cur;
6381
6382         BUG_ON(offset > ring->buf_mask);
6383         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6384
6385         cur = (ring->wptr & ring->buf_mask) - 1;
6386         if (likely(cur > offset))
6387                 ring->ring[offset] = cur - offset;
6388         else
6389                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6390 }
6391
6392 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6393 {
6394         struct amdgpu_device *adev = ring->adev;
6395         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
6396
6397         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6398         amdgpu_ring_write(ring, 0 |     /* src: register*/
6399                                 (5 << 8) |      /* dst: memory */
6400                                 (1 << 20));     /* write confirm */
6401         amdgpu_ring_write(ring, reg);
6402         amdgpu_ring_write(ring, 0);
6403         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6404                                 kiq->reg_val_offs * 4));
6405         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6406                                 kiq->reg_val_offs * 4));
6407 }
6408
6409 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6410                                   uint32_t val)
6411 {
6412         uint32_t cmd;
6413
6414         switch (ring->funcs->type) {
6415         case AMDGPU_RING_TYPE_GFX:
6416                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6417                 break;
6418         case AMDGPU_RING_TYPE_KIQ:
6419                 cmd = 1 << 16; /* no inc addr */
6420                 break;
6421         default:
6422                 cmd = WR_CONFIRM;
6423                 break;
6424         }
6425
6426         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6427         amdgpu_ring_write(ring, cmd);
6428         amdgpu_ring_write(ring, reg);
6429         amdgpu_ring_write(ring, 0);
6430         amdgpu_ring_write(ring, val);
6431 }
6432
6433 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6434 {
6435         struct amdgpu_device *adev = ring->adev;
6436         uint32_t value = 0;
6437
6438         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6439         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6440         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6441         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6442         WREG32(mmSQ_CMD, value);
6443 }
6444
6445 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6446                                                  enum amdgpu_interrupt_state state)
6447 {
6448         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6449                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6450 }
6451
6452 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6453                                                      int me, int pipe,
6454                                                      enum amdgpu_interrupt_state state)
6455 {
6456         u32 mec_int_cntl, mec_int_cntl_reg;
6457
6458         /*
6459          * amdgpu controls only the first MEC. That's why this function only
6460          * handles the setting of interrupts for this specific MEC. All other
6461          * pipes' interrupts are set by amdkfd.
6462          */
6463
6464         if (me == 1) {
6465                 switch (pipe) {
6466                 case 0:
6467                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6468                         break;
6469                 case 1:
6470                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6471                         break;
6472                 case 2:
6473                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6474                         break;
6475                 case 3:
6476                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6477                         break;
6478                 default:
6479                         DRM_DEBUG("invalid pipe %d\n", pipe);
6480                         return;
6481                 }
6482         } else {
6483                 DRM_DEBUG("invalid me %d\n", me);
6484                 return;
6485         }
6486
6487         switch (state) {
6488         case AMDGPU_IRQ_STATE_DISABLE:
6489                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6490                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6491                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6492                 break;
6493         case AMDGPU_IRQ_STATE_ENABLE:
6494                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6495                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6496                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6497                 break;
6498         default:
6499                 break;
6500         }
6501 }
6502
6503 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6504                                              struct amdgpu_irq_src *source,
6505                                              unsigned type,
6506                                              enum amdgpu_interrupt_state state)
6507 {
6508         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6509                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6510
6511         return 0;
6512 }
6513
6514 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6515                                               struct amdgpu_irq_src *source,
6516                                               unsigned type,
6517                                               enum amdgpu_interrupt_state state)
6518 {
6519         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6520                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6521
6522         return 0;
6523 }
6524
6525 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6526                                             struct amdgpu_irq_src *src,
6527                                             unsigned type,
6528                                             enum amdgpu_interrupt_state state)
6529 {
6530         switch (type) {
6531         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6532                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6533                 break;
6534         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6535                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6536                 break;
6537         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6538                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6539                 break;
6540         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6541                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6542                 break;
6543         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6544                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6545                 break;
6546         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6547                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6548                 break;
6549         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6550                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6551                 break;
6552         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6553                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6554                 break;
6555         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6556                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6557                 break;
6558         default:
6559                 break;
6560         }
6561         return 0;
6562 }
6563
6564 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6565                                          struct amdgpu_irq_src *source,
6566                                          unsigned int type,
6567                                          enum amdgpu_interrupt_state state)
6568 {
6569         int enable_flag;
6570
6571         switch (state) {
6572         case AMDGPU_IRQ_STATE_DISABLE:
6573                 enable_flag = 0;
6574                 break;
6575
6576         case AMDGPU_IRQ_STATE_ENABLE:
6577                 enable_flag = 1;
6578                 break;
6579
6580         default:
6581                 return -EINVAL;
6582         }
6583
6584         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6585         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590                      enable_flag);
6591         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                      enable_flag);
6595         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596                      enable_flag);
6597         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6598                      enable_flag);
6599         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6600                      enable_flag);
6601         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602                      enable_flag);
6603         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6604                      enable_flag);
6605
6606         return 0;
6607 }
6608
6609 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6610                                      struct amdgpu_irq_src *source,
6611                                      unsigned int type,
6612                                      enum amdgpu_interrupt_state state)
6613 {
6614         int enable_flag;
6615
6616         switch (state) {
6617         case AMDGPU_IRQ_STATE_DISABLE:
6618                 enable_flag = 1;
6619                 break;
6620
6621         case AMDGPU_IRQ_STATE_ENABLE:
6622                 enable_flag = 0;
6623                 break;
6624
6625         default:
6626                 return -EINVAL;
6627         }
6628
6629         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6630                      enable_flag);
6631
6632         return 0;
6633 }
6634
6635 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6636                             struct amdgpu_irq_src *source,
6637                             struct amdgpu_iv_entry *entry)
6638 {
6639         int i;
6640         u8 me_id, pipe_id, queue_id;
6641         struct amdgpu_ring *ring;
6642
6643         DRM_DEBUG("IH: CP EOP\n");
6644         me_id = (entry->ring_id & 0x0c) >> 2;
6645         pipe_id = (entry->ring_id & 0x03) >> 0;
6646         queue_id = (entry->ring_id & 0x70) >> 4;
6647
6648         switch (me_id) {
6649         case 0:
6650                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6651                 break;
6652         case 1:
6653         case 2:
6654                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6655                         ring = &adev->gfx.compute_ring[i];
6656                         /* Per-queue interrupt is supported for MEC starting from VI.
6657                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6658                           */
6659                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6660                                 amdgpu_fence_process(ring);
6661                 }
6662                 break;
6663         }
6664         return 0;
6665 }
6666
6667 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6668                            struct amdgpu_iv_entry *entry)
6669 {
6670         u8 me_id, pipe_id, queue_id;
6671         struct amdgpu_ring *ring;
6672         int i;
6673
6674         me_id = (entry->ring_id & 0x0c) >> 2;
6675         pipe_id = (entry->ring_id & 0x03) >> 0;
6676         queue_id = (entry->ring_id & 0x70) >> 4;
6677
6678         switch (me_id) {
6679         case 0:
6680                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6681                 break;
6682         case 1:
6683         case 2:
6684                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6685                         ring = &adev->gfx.compute_ring[i];
6686                         if (ring->me == me_id && ring->pipe == pipe_id &&
6687                             ring->queue == queue_id)
6688                                 drm_sched_fault(&ring->sched);
6689                 }
6690                 break;
6691         }
6692 }
6693
6694 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6695                                  struct amdgpu_irq_src *source,
6696                                  struct amdgpu_iv_entry *entry)
6697 {
6698         DRM_ERROR("Illegal register access in command stream\n");
6699         gfx_v8_0_fault(adev, entry);
6700         return 0;
6701 }
6702
6703 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6704                                   struct amdgpu_irq_src *source,
6705                                   struct amdgpu_iv_entry *entry)
6706 {
6707         DRM_ERROR("Illegal instruction in command stream\n");
6708         gfx_v8_0_fault(adev, entry);
6709         return 0;
6710 }
6711
6712 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6713                                      struct amdgpu_irq_src *source,
6714                                      struct amdgpu_iv_entry *entry)
6715 {
6716         DRM_ERROR("CP EDC/ECC error detected.");
6717         return 0;
6718 }
6719
6720 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6721 {
6722         u32 enc, se_id, sh_id, cu_id;
6723         char type[20];
6724         int sq_edc_source = -1;
6725
6726         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6727         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6728
6729         switch (enc) {
6730                 case 0:
6731                         DRM_INFO("SQ general purpose intr detected:"
6732                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6733                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6734                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6735                                         "wlt %d, thread_trace %d.\n",
6736                                         se_id,
6737                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6738                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6739                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6740                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6741                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6742                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6743                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6744                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6745                                         );
6746                         break;
6747                 case 1:
6748                 case 2:
6749
6750                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6751                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6752
6753                         /*
6754                          * This function can be called either directly from ISR
6755                          * or from BH in which case we can access SQ_EDC_INFO
6756                          * instance
6757                          */
6758                         if (in_task()) {
6759                                 mutex_lock(&adev->grbm_idx_mutex);
6760                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6761
6762                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6763
6764                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6765                                 mutex_unlock(&adev->grbm_idx_mutex);
6766                         }
6767
6768                         if (enc == 1)
6769                                 sprintf(type, "instruction intr");
6770                         else
6771                                 sprintf(type, "EDC/ECC error");
6772
6773                         DRM_INFO(
6774                                 "SQ %s detected: "
6775                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6776                                         "trap %s, sq_ed_info.source %s.\n",
6777                                         type, se_id, sh_id, cu_id,
6778                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6779                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6780                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6781                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6782                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6783                                 );
6784                         break;
6785                 default:
6786                         DRM_ERROR("SQ invalid encoding type\n.");
6787         }
6788 }
6789
6790 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6791 {
6792
6793         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6794         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6795
6796         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6797 }
6798
6799 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6800                            struct amdgpu_irq_src *source,
6801                            struct amdgpu_iv_entry *entry)
6802 {
6803         unsigned ih_data = entry->src_data[0];
6804
6805         /*
6806          * Try to submit work so SQ_EDC_INFO can be accessed from
6807          * BH. If previous work submission hasn't finished yet
6808          * just print whatever info is possible directly from the ISR.
6809          */
6810         if (work_pending(&adev->gfx.sq_work.work)) {
6811                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6812         } else {
6813                 adev->gfx.sq_work.ih_data = ih_data;
6814                 schedule_work(&adev->gfx.sq_work.work);
6815         }
6816
6817         return 0;
6818 }
6819
6820 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6821         .name = "gfx_v8_0",
6822         .early_init = gfx_v8_0_early_init,
6823         .late_init = gfx_v8_0_late_init,
6824         .sw_init = gfx_v8_0_sw_init,
6825         .sw_fini = gfx_v8_0_sw_fini,
6826         .hw_init = gfx_v8_0_hw_init,
6827         .hw_fini = gfx_v8_0_hw_fini,
6828         .suspend = gfx_v8_0_suspend,
6829         .resume = gfx_v8_0_resume,
6830         .is_idle = gfx_v8_0_is_idle,
6831         .wait_for_idle = gfx_v8_0_wait_for_idle,
6832         .check_soft_reset = gfx_v8_0_check_soft_reset,
6833         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6834         .soft_reset = gfx_v8_0_soft_reset,
6835         .post_soft_reset = gfx_v8_0_post_soft_reset,
6836         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6837         .set_powergating_state = gfx_v8_0_set_powergating_state,
6838         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6839 };
6840
6841 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6842         .type = AMDGPU_RING_TYPE_GFX,
6843         .align_mask = 0xff,
6844         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6845         .support_64bit_ptrs = false,
6846         .get_rptr = gfx_v8_0_ring_get_rptr,
6847         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6848         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6849         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6850                 5 +  /* COND_EXEC */
6851                 7 +  /* PIPELINE_SYNC */
6852                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6853                 12 +  /* FENCE for VM_FLUSH */
6854                 20 + /* GDS switch */
6855                 4 + /* double SWITCH_BUFFER,
6856                        the first COND_EXEC jump to the place just
6857                            prior to this double SWITCH_BUFFER  */
6858                 5 + /* COND_EXEC */
6859                 7 +      /*     HDP_flush */
6860                 4 +      /*     VGT_flush */
6861                 14 + /* CE_META */
6862                 31 + /* DE_META */
6863                 3 + /* CNTX_CTRL */
6864                 5 + /* HDP_INVL */
6865                 12 + 12 + /* FENCE x2 */
6866                 2, /* SWITCH_BUFFER */
6867         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6868         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6869         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6870         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6871         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6872         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6873         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6874         .test_ring = gfx_v8_0_ring_test_ring,
6875         .test_ib = gfx_v8_0_ring_test_ib,
6876         .insert_nop = amdgpu_ring_insert_nop,
6877         .pad_ib = amdgpu_ring_generic_pad_ib,
6878         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6879         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6880         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6881         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6882         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6883         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6884 };
6885
6886 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6887         .type = AMDGPU_RING_TYPE_COMPUTE,
6888         .align_mask = 0xff,
6889         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6890         .support_64bit_ptrs = false,
6891         .get_rptr = gfx_v8_0_ring_get_rptr,
6892         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6893         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6894         .emit_frame_size =
6895                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6896                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6897                 5 + /* hdp_invalidate */
6898                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6899                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6900                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6901         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6902         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6903         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6904         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6905         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6906         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6907         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6908         .test_ring = gfx_v8_0_ring_test_ring,
6909         .test_ib = gfx_v8_0_ring_test_ib,
6910         .insert_nop = amdgpu_ring_insert_nop,
6911         .pad_ib = amdgpu_ring_generic_pad_ib,
6912         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6913 };
6914
6915 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6916         .type = AMDGPU_RING_TYPE_KIQ,
6917         .align_mask = 0xff,
6918         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6919         .support_64bit_ptrs = false,
6920         .get_rptr = gfx_v8_0_ring_get_rptr,
6921         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6922         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6923         .emit_frame_size =
6924                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6925                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6926                 5 + /* hdp_invalidate */
6927                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6928                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6929                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6930         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6931         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6932         .test_ring = gfx_v8_0_ring_test_ring,
6933         .insert_nop = amdgpu_ring_insert_nop,
6934         .pad_ib = amdgpu_ring_generic_pad_ib,
6935         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6936         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6937 };
6938
6939 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6940 {
6941         int i;
6942
6943         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6944
6945         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6946                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6947
6948         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6949                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6950 }
6951
6952 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6953         .set = gfx_v8_0_set_eop_interrupt_state,
6954         .process = gfx_v8_0_eop_irq,
6955 };
6956
6957 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6958         .set = gfx_v8_0_set_priv_reg_fault_state,
6959         .process = gfx_v8_0_priv_reg_irq,
6960 };
6961
6962 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6963         .set = gfx_v8_0_set_priv_inst_fault_state,
6964         .process = gfx_v8_0_priv_inst_irq,
6965 };
6966
6967 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6968         .set = gfx_v8_0_set_cp_ecc_int_state,
6969         .process = gfx_v8_0_cp_ecc_error_irq,
6970 };
6971
6972 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6973         .set = gfx_v8_0_set_sq_int_state,
6974         .process = gfx_v8_0_sq_irq,
6975 };
6976
6977 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6978 {
6979         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6980         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6981
6982         adev->gfx.priv_reg_irq.num_types = 1;
6983         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6984
6985         adev->gfx.priv_inst_irq.num_types = 1;
6986         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6987
6988         adev->gfx.cp_ecc_error_irq.num_types = 1;
6989         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6990
6991         adev->gfx.sq_irq.num_types = 1;
6992         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6993 }
6994
6995 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6996 {
6997         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6998 }
6999
7000 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7001 {
7002         /* init asci gds info */
7003         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7004         adev->gds.gws_size = 64;
7005         adev->gds.oa_size = 16;
7006         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7007 }
7008
7009 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7010                                                  u32 bitmap)
7011 {
7012         u32 data;
7013
7014         if (!bitmap)
7015                 return;
7016
7017         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7018         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7019
7020         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7021 }
7022
7023 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7024 {
7025         u32 data, mask;
7026
7027         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7028                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7029
7030         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7031
7032         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7033 }
7034
7035 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7036 {
7037         int i, j, k, counter, active_cu_number = 0;
7038         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7039         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7040         unsigned disable_masks[4 * 2];
7041         u32 ao_cu_num;
7042
7043         memset(cu_info, 0, sizeof(*cu_info));
7044
7045         if (adev->flags & AMD_IS_APU)
7046                 ao_cu_num = 2;
7047         else
7048                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7049
7050         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7051
7052         mutex_lock(&adev->grbm_idx_mutex);
7053         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7054                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7055                         mask = 1;
7056                         ao_bitmap = 0;
7057                         counter = 0;
7058                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7059                         if (i < 4 && j < 2)
7060                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7061                                         adev, disable_masks[i * 2 + j]);
7062                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7063                         cu_info->bitmap[i][j] = bitmap;
7064
7065                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7066                                 if (bitmap & mask) {
7067                                         if (counter < ao_cu_num)
7068                                                 ao_bitmap |= mask;
7069                                         counter ++;
7070                                 }
7071                                 mask <<= 1;
7072                         }
7073                         active_cu_number += counter;
7074                         if (i < 2 && j < 2)
7075                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7076                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7077                 }
7078         }
7079         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7080         mutex_unlock(&adev->grbm_idx_mutex);
7081
7082         cu_info->number = active_cu_number;
7083         cu_info->ao_cu_mask = ao_cu_mask;
7084         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7085         cu_info->max_waves_per_simd = 10;
7086         cu_info->max_scratch_slots_per_cu = 32;
7087         cu_info->wave_front_size = 64;
7088         cu_info->lds_size = 64;
7089 }
7090
7091 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7092 {
7093         .type = AMD_IP_BLOCK_TYPE_GFX,
7094         .major = 8,
7095         .minor = 0,
7096         .rev = 0,
7097         .funcs = &gfx_v8_0_ip_funcs,
7098 };
7099
7100 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7101 {
7102         .type = AMD_IP_BLOCK_TYPE_GFX,
7103         .major = 8,
7104         .minor = 1,
7105         .rev = 0,
7106         .funcs = &gfx_v8_0_ip_funcs,
7107 };
7108
7109 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7110 {
7111         uint64_t ce_payload_addr;
7112         int cnt_ce;
7113         union {
7114                 struct vi_ce_ib_state regular;
7115                 struct vi_ce_ib_state_chained_ib chained;
7116         } ce_payload = {};
7117
7118         if (ring->adev->virt.chained_ib_support) {
7119                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7120                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7121                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7122         } else {
7123                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7124                         offsetof(struct vi_gfx_meta_data, ce_payload);
7125                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7126         }
7127
7128         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7129         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7130                                 WRITE_DATA_DST_SEL(8) |
7131                                 WR_CONFIRM) |
7132                                 WRITE_DATA_CACHE_POLICY(0));
7133         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7134         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7135         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7136 }
7137
7138 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7139 {
7140         uint64_t de_payload_addr, gds_addr, csa_addr;
7141         int cnt_de;
7142         union {
7143                 struct vi_de_ib_state regular;
7144                 struct vi_de_ib_state_chained_ib chained;
7145         } de_payload = {};
7146
7147         csa_addr = amdgpu_csa_vaddr(ring->adev);
7148         gds_addr = csa_addr + 4096;
7149         if (ring->adev->virt.chained_ib_support) {
7150                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7151                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7152                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7153                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7154         } else {
7155                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7156                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7157                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7158                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7159         }
7160
7161         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7162         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7163                                 WRITE_DATA_DST_SEL(8) |
7164                                 WR_CONFIRM) |
7165                                 WRITE_DATA_CACHE_POLICY(0));
7166         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7167         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7168         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7169 }