fc32586ef80b1a5c91117b5f469a17826f349408
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734         switch (adev->asic_type) {
735         case CHIP_TOPAZ:
736                 amdgpu_device_program_register_sequence(adev,
737                                                         iceland_mgcg_cgcg_init,
738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         golden_settings_iceland_a11,
741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_golden_common_all,
744                                                         ARRAY_SIZE(iceland_golden_common_all));
745                 break;
746         case CHIP_FIJI:
747                 amdgpu_device_program_register_sequence(adev,
748                                                         fiji_mgcg_cgcg_init,
749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         golden_settings_fiji_a10,
752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_golden_common_all,
755                                                         ARRAY_SIZE(fiji_golden_common_all));
756                 break;
757
758         case CHIP_TONGA:
759                 amdgpu_device_program_register_sequence(adev,
760                                                         tonga_mgcg_cgcg_init,
761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         golden_settings_tonga_a11,
764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_golden_common_all,
767                                                         ARRAY_SIZE(tonga_golden_common_all));
768                 break;
769         case CHIP_VEGAM:
770                 amdgpu_device_program_register_sequence(adev,
771                                                         golden_settings_vegam_a11,
772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
773                 amdgpu_device_program_register_sequence(adev,
774                                                         vegam_golden_common_all,
775                                                         ARRAY_SIZE(vegam_golden_common_all));
776                 break;
777         case CHIP_POLARIS11:
778         case CHIP_POLARIS12:
779                 amdgpu_device_program_register_sequence(adev,
780                                                         golden_settings_polaris11_a11,
781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
782                 amdgpu_device_program_register_sequence(adev,
783                                                         polaris11_golden_common_all,
784                                                         ARRAY_SIZE(polaris11_golden_common_all));
785                 break;
786         case CHIP_POLARIS10:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         golden_settings_polaris10_a11,
789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         polaris10_golden_common_all,
792                                                         ARRAY_SIZE(polaris10_golden_common_all));
793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794                 if (adev->pdev->revision == 0xc7 &&
795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800                 }
801                 break;
802         case CHIP_CARRIZO:
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_mgcg_cgcg_init,
805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_settings_a11,
808                                                         ARRAY_SIZE(cz_golden_settings_a11));
809                 amdgpu_device_program_register_sequence(adev,
810                                                         cz_golden_common_all,
811                                                         ARRAY_SIZE(cz_golden_common_all));
812                 break;
813         case CHIP_STONEY:
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_mgcg_cgcg_init,
816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_settings_a11,
819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
820                 amdgpu_device_program_register_sequence(adev,
821                                                         stoney_golden_common_all,
822                                                         ARRAY_SIZE(stoney_golden_common_all));
823                 break;
824         default:
825                 break;
826         }
827 }
828
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831         adev->gfx.scratch.num_reg = 8;
832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838         struct amdgpu_device *adev = ring->adev;
839         uint32_t scratch;
840         uint32_t tmp = 0;
841         unsigned i;
842         int r;
843
844         r = amdgpu_gfx_scratch_get(adev, &scratch);
845         if (r)
846                 return r;
847
848         WREG32(scratch, 0xCAFEDEAD);
849         r = amdgpu_ring_alloc(ring, 3);
850         if (r)
851                 goto error_free_scratch;
852
853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855         amdgpu_ring_write(ring, 0xDEADBEEF);
856         amdgpu_ring_commit(ring);
857
858         for (i = 0; i < adev->usec_timeout; i++) {
859                 tmp = RREG32(scratch);
860                 if (tmp == 0xDEADBEEF)
861                         break;
862                 udelay(1);
863         }
864
865         if (i >= adev->usec_timeout)
866                 r = -ETIMEDOUT;
867
868 error_free_scratch:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875         struct amdgpu_device *adev = ring->adev;
876         struct amdgpu_ib ib;
877         struct dma_fence *f = NULL;
878
879         unsigned int index;
880         uint64_t gpu_addr;
881         uint32_t tmp;
882         long r;
883
884         r = amdgpu_device_wb_get(adev, &index);
885         if (r)
886                 return r;
887
888         gpu_addr = adev->wb.gpu_addr + (index * 4);
889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890         memset(&ib, 0, sizeof(ib));
891         r = amdgpu_ib_get(adev, NULL, 16, &ib);
892         if (r)
893                 goto err1;
894
895         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
896         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
897         ib.ptr[2] = lower_32_bits(gpu_addr);
898         ib.ptr[3] = upper_32_bits(gpu_addr);
899         ib.ptr[4] = 0xDEADBEEF;
900         ib.length_dw = 5;
901
902         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
903         if (r)
904                 goto err2;
905
906         r = dma_fence_wait_timeout(f, false, timeout);
907         if (r == 0) {
908                 r = -ETIMEDOUT;
909                 goto err2;
910         } else if (r < 0) {
911                 goto err2;
912         }
913
914         tmp = adev->wb.wb[index];
915         if (tmp == 0xDEADBEEF)
916                 r = 0;
917         else
918                 r = -EINVAL;
919
920 err2:
921         amdgpu_ib_free(adev, &ib, NULL);
922         dma_fence_put(f);
923 err1:
924         amdgpu_device_wb_free(adev, index);
925         return r;
926 }
927
928
929 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
930 {
931         release_firmware(adev->gfx.pfp_fw);
932         adev->gfx.pfp_fw = NULL;
933         release_firmware(adev->gfx.me_fw);
934         adev->gfx.me_fw = NULL;
935         release_firmware(adev->gfx.ce_fw);
936         adev->gfx.ce_fw = NULL;
937         release_firmware(adev->gfx.rlc_fw);
938         adev->gfx.rlc_fw = NULL;
939         release_firmware(adev->gfx.mec_fw);
940         adev->gfx.mec_fw = NULL;
941         if ((adev->asic_type != CHIP_STONEY) &&
942             (adev->asic_type != CHIP_TOPAZ))
943                 release_firmware(adev->gfx.mec2_fw);
944         adev->gfx.mec2_fw = NULL;
945
946         kfree(adev->gfx.rlc.register_list_format);
947 }
948
949 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
950 {
951         const char *chip_name;
952         char fw_name[30];
953         int err;
954         struct amdgpu_firmware_info *info = NULL;
955         const struct common_firmware_header *header = NULL;
956         const struct gfx_firmware_header_v1_0 *cp_hdr;
957         const struct rlc_firmware_header_v2_0 *rlc_hdr;
958         unsigned int *tmp = NULL, i;
959
960         DRM_DEBUG("\n");
961
962         switch (adev->asic_type) {
963         case CHIP_TOPAZ:
964                 chip_name = "topaz";
965                 break;
966         case CHIP_TONGA:
967                 chip_name = "tonga";
968                 break;
969         case CHIP_CARRIZO:
970                 chip_name = "carrizo";
971                 break;
972         case CHIP_FIJI:
973                 chip_name = "fiji";
974                 break;
975         case CHIP_STONEY:
976                 chip_name = "stoney";
977                 break;
978         case CHIP_POLARIS10:
979                 chip_name = "polaris10";
980                 break;
981         case CHIP_POLARIS11:
982                 chip_name = "polaris11";
983                 break;
984         case CHIP_POLARIS12:
985                 chip_name = "polaris12";
986                 break;
987         case CHIP_VEGAM:
988                 chip_name = "vegam";
989                 break;
990         default:
991                 BUG();
992         }
993
994         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
995                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
996                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997                 if (err == -ENOENT) {
998                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000                 }
1001         } else {
1002                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1003                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004         }
1005         if (err)
1006                 goto out;
1007         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1008         if (err)
1009                 goto out;
1010         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1011         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1012         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013
1014         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1015                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1016                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017                 if (err == -ENOENT) {
1018                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020                 }
1021         } else {
1022                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1023                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024         }
1025         if (err)
1026                 goto out;
1027         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1028         if (err)
1029                 goto out;
1030         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1031         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032
1033         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034
1035         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1036                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1037                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038                 if (err == -ENOENT) {
1039                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041                 }
1042         } else {
1043                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1044                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045         }
1046         if (err)
1047                 goto out;
1048         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1049         if (err)
1050                 goto out;
1051         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1052         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054
1055         /*
1056          * Support for MCBP/Virtualization in combination with chained IBs is
1057          * formal released on feature version #46
1058          */
1059         if (adev->gfx.ce_feature_version >= 46 &&
1060             adev->gfx.pfp_feature_version >= 46) {
1061                 adev->virt.chained_ib_support = true;
1062                 DRM_INFO("Chained IB support enabled!\n");
1063         } else
1064                 adev->virt.chained_ib_support = false;
1065
1066         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1067         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1068         if (err)
1069                 goto out;
1070         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1071         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1072         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1073         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074
1075         adev->gfx.rlc.save_and_restore_offset =
1076                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1077         adev->gfx.rlc.clear_state_descriptor_offset =
1078                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1079         adev->gfx.rlc.avail_scratch_ram_locations =
1080                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1081         adev->gfx.rlc.reg_restore_list_size =
1082                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1083         adev->gfx.rlc.reg_list_format_start =
1084                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1085         adev->gfx.rlc.reg_list_format_separate_start =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1087         adev->gfx.rlc.starting_offsets_start =
1088                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1089         adev->gfx.rlc.reg_list_format_size_bytes =
1090                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1091         adev->gfx.rlc.reg_list_size_bytes =
1092                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093
1094         adev->gfx.rlc.register_list_format =
1095                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1096                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097
1098         if (!adev->gfx.rlc.register_list_format) {
1099                 err = -ENOMEM;
1100                 goto out;
1101         }
1102
1103         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1104                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1105         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1106                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1107
1108         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109
1110         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1111                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1112         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1113                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114
1115         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1116                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1117                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118                 if (err == -ENOENT) {
1119                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121                 }
1122         } else {
1123                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1124                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125         }
1126         if (err)
1127                 goto out;
1128         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1129         if (err)
1130                 goto out;
1131         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1132         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1133         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134
1135         if ((adev->asic_type != CHIP_STONEY) &&
1136             (adev->asic_type != CHIP_TOPAZ)) {
1137                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1138                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1139                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140                         if (err == -ENOENT) {
1141                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                         }
1144                 } else {
1145                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1146                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1147                 }
1148                 if (!err) {
1149                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1150                         if (err)
1151                                 goto out;
1152                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1153                                 adev->gfx.mec2_fw->data;
1154                         adev->gfx.mec2_fw_version =
1155                                 le32_to_cpu(cp_hdr->header.ucode_version);
1156                         adev->gfx.mec2_feature_version =
1157                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1158                 } else {
1159                         err = 0;
1160                         adev->gfx.mec2_fw = NULL;
1161                 }
1162         }
1163
1164         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166         info->fw = adev->gfx.pfp_fw;
1167         header = (const struct common_firmware_header *)info->fw->data;
1168         adev->firmware.fw_size +=
1169                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173         info->fw = adev->gfx.me_fw;
1174         header = (const struct common_firmware_header *)info->fw->data;
1175         adev->firmware.fw_size +=
1176                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180         info->fw = adev->gfx.ce_fw;
1181         header = (const struct common_firmware_header *)info->fw->data;
1182         adev->firmware.fw_size +=
1183                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187         info->fw = adev->gfx.rlc_fw;
1188         header = (const struct common_firmware_header *)info->fw->data;
1189         adev->firmware.fw_size +=
1190                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194         info->fw = adev->gfx.mec_fw;
1195         header = (const struct common_firmware_header *)info->fw->data;
1196         adev->firmware.fw_size +=
1197                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198
1199         /* we need account JT in */
1200         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201         adev->firmware.fw_size +=
1202                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203
1204         if (amdgpu_sriov_vf(adev)) {
1205                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207                 info->fw = adev->gfx.mec_fw;
1208                 adev->firmware.fw_size +=
1209                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210         }
1211
1212         if (adev->gfx.mec2_fw) {
1213                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215                 info->fw = adev->gfx.mec2_fw;
1216                 header = (const struct common_firmware_header *)info->fw->data;
1217                 adev->firmware.fw_size +=
1218                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219         }
1220
1221 out:
1222         if (err) {
1223                 dev_err(adev->dev,
1224                         "gfx8: Failed to load firmware \"%s\"\n",
1225                         fw_name);
1226                 release_firmware(adev->gfx.pfp_fw);
1227                 adev->gfx.pfp_fw = NULL;
1228                 release_firmware(adev->gfx.me_fw);
1229                 adev->gfx.me_fw = NULL;
1230                 release_firmware(adev->gfx.ce_fw);
1231                 adev->gfx.ce_fw = NULL;
1232                 release_firmware(adev->gfx.rlc_fw);
1233                 adev->gfx.rlc_fw = NULL;
1234                 release_firmware(adev->gfx.mec_fw);
1235                 adev->gfx.mec_fw = NULL;
1236                 release_firmware(adev->gfx.mec2_fw);
1237                 adev->gfx.mec2_fw = NULL;
1238         }
1239         return err;
1240 }
1241
1242 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1243                                     volatile u32 *buffer)
1244 {
1245         u32 count = 0, i;
1246         const struct cs_section_def *sect = NULL;
1247         const struct cs_extent_def *ext = NULL;
1248
1249         if (adev->gfx.rlc.cs_data == NULL)
1250                 return;
1251         if (buffer == NULL)
1252                 return;
1253
1254         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1255         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1256
1257         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1258         buffer[count++] = cpu_to_le32(0x80000000);
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260
1261         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1262                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1263                         if (sect->id == SECT_CONTEXT) {
1264                                 buffer[count++] =
1265                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1266                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1267                                                 PACKET3_SET_CONTEXT_REG_START);
1268                                 for (i = 0; i < ext->reg_count; i++)
1269                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1270                         } else {
1271                                 return;
1272                         }
1273                 }
1274         }
1275
1276         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1277         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1278                         PACKET3_SET_CONTEXT_REG_START);
1279         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1281
1282         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1283         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1284
1285         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1286         buffer[count++] = cpu_to_le32(0);
1287 }
1288
1289 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1290 {
1291         if (adev->asic_type == CHIP_CARRIZO)
1292                 return 5;
1293         else
1294                 return 4;
1295 }
1296
1297 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1298 {
1299         const struct cs_section_def *cs_data;
1300         int r;
1301
1302         adev->gfx.rlc.cs_data = vi_cs_data;
1303
1304         cs_data = adev->gfx.rlc.cs_data;
1305
1306         if (cs_data) {
1307                 /* init clear state block */
1308                 r = amdgpu_gfx_rlc_init_csb(adev);
1309                 if (r)
1310                         return r;
1311         }
1312
1313         if ((adev->asic_type == CHIP_CARRIZO) ||
1314             (adev->asic_type == CHIP_STONEY)) {
1315                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1316                 r = amdgpu_gfx_rlc_init_cpt(adev);
1317                 if (r)
1318                         return r;
1319         }
1320
1321         /* init spm vmid with 0xf */
1322         if (adev->gfx.rlc.funcs->update_spm_vmid)
1323                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1324
1325         return 0;
1326 }
1327
1328 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1329 {
1330         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1331 }
1332
1333 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1334 {
1335         int r;
1336         u32 *hpd;
1337         size_t mec_hpd_size;
1338
1339         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1340
1341         /* take ownership of the relevant compute queues */
1342         amdgpu_gfx_compute_queue_acquire(adev);
1343
1344         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1345
1346         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1347                                       AMDGPU_GEM_DOMAIN_VRAM,
1348                                       &adev->gfx.mec.hpd_eop_obj,
1349                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1350                                       (void **)&hpd);
1351         if (r) {
1352                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1353                 return r;
1354         }
1355
1356         memset(hpd, 0, mec_hpd_size);
1357
1358         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1359         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1360
1361         return 0;
1362 }
1363
1364 static const u32 vgpr_init_compute_shader[] =
1365 {
1366         0x7e000209, 0x7e020208,
1367         0x7e040207, 0x7e060206,
1368         0x7e080205, 0x7e0a0204,
1369         0x7e0c0203, 0x7e0e0202,
1370         0x7e100201, 0x7e120200,
1371         0x7e140209, 0x7e160208,
1372         0x7e180207, 0x7e1a0206,
1373         0x7e1c0205, 0x7e1e0204,
1374         0x7e200203, 0x7e220202,
1375         0x7e240201, 0x7e260200,
1376         0x7e280209, 0x7e2a0208,
1377         0x7e2c0207, 0x7e2e0206,
1378         0x7e300205, 0x7e320204,
1379         0x7e340203, 0x7e360202,
1380         0x7e380201, 0x7e3a0200,
1381         0x7e3c0209, 0x7e3e0208,
1382         0x7e400207, 0x7e420206,
1383         0x7e440205, 0x7e460204,
1384         0x7e480203, 0x7e4a0202,
1385         0x7e4c0201, 0x7e4e0200,
1386         0x7e500209, 0x7e520208,
1387         0x7e540207, 0x7e560206,
1388         0x7e580205, 0x7e5a0204,
1389         0x7e5c0203, 0x7e5e0202,
1390         0x7e600201, 0x7e620200,
1391         0x7e640209, 0x7e660208,
1392         0x7e680207, 0x7e6a0206,
1393         0x7e6c0205, 0x7e6e0204,
1394         0x7e700203, 0x7e720202,
1395         0x7e740201, 0x7e760200,
1396         0x7e780209, 0x7e7a0208,
1397         0x7e7c0207, 0x7e7e0206,
1398         0xbf8a0000, 0xbf810000,
1399 };
1400
1401 static const u32 sgpr_init_compute_shader[] =
1402 {
1403         0xbe8a0100, 0xbe8c0102,
1404         0xbe8e0104, 0xbe900106,
1405         0xbe920108, 0xbe940100,
1406         0xbe960102, 0xbe980104,
1407         0xbe9a0106, 0xbe9c0108,
1408         0xbe9e0100, 0xbea00102,
1409         0xbea20104, 0xbea40106,
1410         0xbea60108, 0xbea80100,
1411         0xbeaa0102, 0xbeac0104,
1412         0xbeae0106, 0xbeb00108,
1413         0xbeb20100, 0xbeb40102,
1414         0xbeb60104, 0xbeb80106,
1415         0xbeba0108, 0xbebc0100,
1416         0xbebe0102, 0xbec00104,
1417         0xbec20106, 0xbec40108,
1418         0xbec60100, 0xbec80102,
1419         0xbee60004, 0xbee70005,
1420         0xbeea0006, 0xbeeb0007,
1421         0xbee80008, 0xbee90009,
1422         0xbefc0000, 0xbf8a0000,
1423         0xbf810000, 0x00000000,
1424 };
1425
1426 static const u32 vgpr_init_regs[] =
1427 {
1428         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1429         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1430         mmCOMPUTE_NUM_THREAD_X, 256*4,
1431         mmCOMPUTE_NUM_THREAD_Y, 1,
1432         mmCOMPUTE_NUM_THREAD_Z, 1,
1433         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1434         mmCOMPUTE_PGM_RSRC2, 20,
1435         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1436         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1437         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1438         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1439         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1440         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1441         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1442         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1443         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1444         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1445 };
1446
1447 static const u32 sgpr1_init_regs[] =
1448 {
1449         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1450         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1451         mmCOMPUTE_NUM_THREAD_X, 256*5,
1452         mmCOMPUTE_NUM_THREAD_Y, 1,
1453         mmCOMPUTE_NUM_THREAD_Z, 1,
1454         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1455         mmCOMPUTE_PGM_RSRC2, 20,
1456         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1457         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1458         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1459         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1460         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1461         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1462         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1463         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1464         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1465         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1466 };
1467
1468 static const u32 sgpr2_init_regs[] =
1469 {
1470         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1471         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1472         mmCOMPUTE_NUM_THREAD_X, 256*5,
1473         mmCOMPUTE_NUM_THREAD_Y, 1,
1474         mmCOMPUTE_NUM_THREAD_Z, 1,
1475         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1476         mmCOMPUTE_PGM_RSRC2, 20,
1477         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1478         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1479         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1480         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1481         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1482         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1483         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1484         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1485         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1486         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1487 };
1488
1489 static const u32 sec_ded_counter_registers[] =
1490 {
1491         mmCPC_EDC_ATC_CNT,
1492         mmCPC_EDC_SCRATCH_CNT,
1493         mmCPC_EDC_UCODE_CNT,
1494         mmCPF_EDC_ATC_CNT,
1495         mmCPF_EDC_ROQ_CNT,
1496         mmCPF_EDC_TAG_CNT,
1497         mmCPG_EDC_ATC_CNT,
1498         mmCPG_EDC_DMA_CNT,
1499         mmCPG_EDC_TAG_CNT,
1500         mmDC_EDC_CSINVOC_CNT,
1501         mmDC_EDC_RESTORE_CNT,
1502         mmDC_EDC_STATE_CNT,
1503         mmGDS_EDC_CNT,
1504         mmGDS_EDC_GRBM_CNT,
1505         mmGDS_EDC_OA_DED,
1506         mmSPI_EDC_CNT,
1507         mmSQC_ATC_EDC_GATCL1_CNT,
1508         mmSQC_EDC_CNT,
1509         mmSQ_EDC_DED_CNT,
1510         mmSQ_EDC_INFO,
1511         mmSQ_EDC_SEC_CNT,
1512         mmTCC_EDC_CNT,
1513         mmTCP_ATC_EDC_GATCL1_CNT,
1514         mmTCP_EDC_CNT,
1515         mmTD_EDC_CNT
1516 };
1517
1518 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1519 {
1520         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1521         struct amdgpu_ib ib;
1522         struct dma_fence *f = NULL;
1523         int r, i;
1524         u32 tmp;
1525         unsigned total_size, vgpr_offset, sgpr_offset;
1526         u64 gpu_addr;
1527
1528         /* only supported on CZ */
1529         if (adev->asic_type != CHIP_CARRIZO)
1530                 return 0;
1531
1532         /* bail if the compute ring is not ready */
1533         if (!ring->sched.ready)
1534                 return 0;
1535
1536         tmp = RREG32(mmGB_EDC_MODE);
1537         WREG32(mmGB_EDC_MODE, 0);
1538
1539         total_size =
1540                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1541         total_size +=
1542                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1543         total_size +=
1544                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1545         total_size = ALIGN(total_size, 256);
1546         vgpr_offset = total_size;
1547         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1548         sgpr_offset = total_size;
1549         total_size += sizeof(sgpr_init_compute_shader);
1550
1551         /* allocate an indirect buffer to put the commands in */
1552         memset(&ib, 0, sizeof(ib));
1553         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1554         if (r) {
1555                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1556                 return r;
1557         }
1558
1559         /* load the compute shaders */
1560         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1561                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1562
1563         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1564                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1565
1566         /* init the ib length to 0 */
1567         ib.length_dw = 0;
1568
1569         /* VGPR */
1570         /* write the register state for the compute dispatch */
1571         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1572                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1573                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1574                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1575         }
1576         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1577         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1578         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1579         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1580         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1581         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1582
1583         /* write dispatch packet */
1584         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1585         ib.ptr[ib.length_dw++] = 8; /* x */
1586         ib.ptr[ib.length_dw++] = 1; /* y */
1587         ib.ptr[ib.length_dw++] = 1; /* z */
1588         ib.ptr[ib.length_dw++] =
1589                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1590
1591         /* write CS partial flush packet */
1592         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1593         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1594
1595         /* SGPR1 */
1596         /* write the register state for the compute dispatch */
1597         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1598                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1599                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1600                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1601         }
1602         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1603         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1604         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1605         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1606         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1607         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1608
1609         /* write dispatch packet */
1610         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1611         ib.ptr[ib.length_dw++] = 8; /* x */
1612         ib.ptr[ib.length_dw++] = 1; /* y */
1613         ib.ptr[ib.length_dw++] = 1; /* z */
1614         ib.ptr[ib.length_dw++] =
1615                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1616
1617         /* write CS partial flush packet */
1618         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1619         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1620
1621         /* SGPR2 */
1622         /* write the register state for the compute dispatch */
1623         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1624                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1625                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1626                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1627         }
1628         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1629         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1630         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1631         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1632         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1633         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1634
1635         /* write dispatch packet */
1636         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1637         ib.ptr[ib.length_dw++] = 8; /* x */
1638         ib.ptr[ib.length_dw++] = 1; /* y */
1639         ib.ptr[ib.length_dw++] = 1; /* z */
1640         ib.ptr[ib.length_dw++] =
1641                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1642
1643         /* write CS partial flush packet */
1644         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1645         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1646
1647         /* shedule the ib on the ring */
1648         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1649         if (r) {
1650                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1651                 goto fail;
1652         }
1653
1654         /* wait for the GPU to finish processing the IB */
1655         r = dma_fence_wait(f, false);
1656         if (r) {
1657                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1658                 goto fail;
1659         }
1660
1661         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1662         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1663         WREG32(mmGB_EDC_MODE, tmp);
1664
1665         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1666         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1667         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1668
1669
1670         /* read back registers to clear the counters */
1671         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1672                 RREG32(sec_ded_counter_registers[i]);
1673
1674 fail:
1675         amdgpu_ib_free(adev, &ib, NULL);
1676         dma_fence_put(f);
1677
1678         return r;
1679 }
1680
1681 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1682 {
1683         u32 gb_addr_config;
1684         u32 mc_arb_ramcfg;
1685         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1686         u32 tmp;
1687         int ret;
1688
1689         switch (adev->asic_type) {
1690         case CHIP_TOPAZ:
1691                 adev->gfx.config.max_shader_engines = 1;
1692                 adev->gfx.config.max_tile_pipes = 2;
1693                 adev->gfx.config.max_cu_per_sh = 6;
1694                 adev->gfx.config.max_sh_per_se = 1;
1695                 adev->gfx.config.max_backends_per_se = 2;
1696                 adev->gfx.config.max_texture_channel_caches = 2;
1697                 adev->gfx.config.max_gprs = 256;
1698                 adev->gfx.config.max_gs_threads = 32;
1699                 adev->gfx.config.max_hw_contexts = 8;
1700
1701                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1702                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1703                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1704                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1705                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1706                 break;
1707         case CHIP_FIJI:
1708                 adev->gfx.config.max_shader_engines = 4;
1709                 adev->gfx.config.max_tile_pipes = 16;
1710                 adev->gfx.config.max_cu_per_sh = 16;
1711                 adev->gfx.config.max_sh_per_se = 1;
1712                 adev->gfx.config.max_backends_per_se = 4;
1713                 adev->gfx.config.max_texture_channel_caches = 16;
1714                 adev->gfx.config.max_gprs = 256;
1715                 adev->gfx.config.max_gs_threads = 32;
1716                 adev->gfx.config.max_hw_contexts = 8;
1717
1718                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1719                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1720                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1721                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1722                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1723                 break;
1724         case CHIP_POLARIS11:
1725         case CHIP_POLARIS12:
1726                 ret = amdgpu_atombios_get_gfx_info(adev);
1727                 if (ret)
1728                         return ret;
1729                 adev->gfx.config.max_gprs = 256;
1730                 adev->gfx.config.max_gs_threads = 32;
1731                 adev->gfx.config.max_hw_contexts = 8;
1732
1733                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1738                 break;
1739         case CHIP_POLARIS10:
1740         case CHIP_VEGAM:
1741                 ret = amdgpu_atombios_get_gfx_info(adev);
1742                 if (ret)
1743                         return ret;
1744                 adev->gfx.config.max_gprs = 256;
1745                 adev->gfx.config.max_gs_threads = 32;
1746                 adev->gfx.config.max_hw_contexts = 8;
1747
1748                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1753                 break;
1754         case CHIP_TONGA:
1755                 adev->gfx.config.max_shader_engines = 4;
1756                 adev->gfx.config.max_tile_pipes = 8;
1757                 adev->gfx.config.max_cu_per_sh = 8;
1758                 adev->gfx.config.max_sh_per_se = 1;
1759                 adev->gfx.config.max_backends_per_se = 2;
1760                 adev->gfx.config.max_texture_channel_caches = 8;
1761                 adev->gfx.config.max_gprs = 256;
1762                 adev->gfx.config.max_gs_threads = 32;
1763                 adev->gfx.config.max_hw_contexts = 8;
1764
1765                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1766                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1767                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1768                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1769                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1770                 break;
1771         case CHIP_CARRIZO:
1772                 adev->gfx.config.max_shader_engines = 1;
1773                 adev->gfx.config.max_tile_pipes = 2;
1774                 adev->gfx.config.max_sh_per_se = 1;
1775                 adev->gfx.config.max_backends_per_se = 2;
1776                 adev->gfx.config.max_cu_per_sh = 8;
1777                 adev->gfx.config.max_texture_channel_caches = 2;
1778                 adev->gfx.config.max_gprs = 256;
1779                 adev->gfx.config.max_gs_threads = 32;
1780                 adev->gfx.config.max_hw_contexts = 8;
1781
1782                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1783                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1784                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1785                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1786                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1787                 break;
1788         case CHIP_STONEY:
1789                 adev->gfx.config.max_shader_engines = 1;
1790                 adev->gfx.config.max_tile_pipes = 2;
1791                 adev->gfx.config.max_sh_per_se = 1;
1792                 adev->gfx.config.max_backends_per_se = 1;
1793                 adev->gfx.config.max_cu_per_sh = 3;
1794                 adev->gfx.config.max_texture_channel_caches = 2;
1795                 adev->gfx.config.max_gprs = 256;
1796                 adev->gfx.config.max_gs_threads = 16;
1797                 adev->gfx.config.max_hw_contexts = 8;
1798
1799                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1800                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1801                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1802                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1803                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1804                 break;
1805         default:
1806                 adev->gfx.config.max_shader_engines = 2;
1807                 adev->gfx.config.max_tile_pipes = 4;
1808                 adev->gfx.config.max_cu_per_sh = 2;
1809                 adev->gfx.config.max_sh_per_se = 1;
1810                 adev->gfx.config.max_backends_per_se = 2;
1811                 adev->gfx.config.max_texture_channel_caches = 4;
1812                 adev->gfx.config.max_gprs = 256;
1813                 adev->gfx.config.max_gs_threads = 32;
1814                 adev->gfx.config.max_hw_contexts = 8;
1815
1816                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1820                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1821                 break;
1822         }
1823
1824         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1825         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1826
1827         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1828                                 MC_ARB_RAMCFG, NOOFBANK);
1829         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1830                                 MC_ARB_RAMCFG, NOOFRANKS);
1831
1832         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1833         adev->gfx.config.mem_max_burst_length_bytes = 256;
1834         if (adev->flags & AMD_IS_APU) {
1835                 /* Get memory bank mapping mode. */
1836                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1837                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1838                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1839
1840                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1841                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1842                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1843
1844                 /* Validate settings in case only one DIMM installed. */
1845                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1846                         dimm00_addr_map = 0;
1847                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1848                         dimm01_addr_map = 0;
1849                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1850                         dimm10_addr_map = 0;
1851                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1852                         dimm11_addr_map = 0;
1853
1854                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1855                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1856                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1857                         adev->gfx.config.mem_row_size_in_kb = 2;
1858                 else
1859                         adev->gfx.config.mem_row_size_in_kb = 1;
1860         } else {
1861                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1862                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1863                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1864                         adev->gfx.config.mem_row_size_in_kb = 4;
1865         }
1866
1867         adev->gfx.config.shader_engine_tile_size = 32;
1868         adev->gfx.config.num_gpus = 1;
1869         adev->gfx.config.multi_gpu_tile_size = 64;
1870
1871         /* fix up row size */
1872         switch (adev->gfx.config.mem_row_size_in_kb) {
1873         case 1:
1874         default:
1875                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1876                 break;
1877         case 2:
1878                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1879                 break;
1880         case 4:
1881                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1882                 break;
1883         }
1884         adev->gfx.config.gb_addr_config = gb_addr_config;
1885
1886         return 0;
1887 }
1888
1889 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1890                                         int mec, int pipe, int queue)
1891 {
1892         int r;
1893         unsigned irq_type;
1894         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1895
1896         ring = &adev->gfx.compute_ring[ring_id];
1897
1898         /* mec0 is me1 */
1899         ring->me = mec + 1;
1900         ring->pipe = pipe;
1901         ring->queue = queue;
1902
1903         ring->ring_obj = NULL;
1904         ring->use_doorbell = true;
1905         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1906         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1907                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1908         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1909
1910         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1911                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1912                 + ring->pipe;
1913
1914         /* type-2 packets are deprecated on MEC, use type-3 instead */
1915         r = amdgpu_ring_init(adev, ring, 1024,
1916                         &adev->gfx.eop_irq, irq_type);
1917         if (r)
1918                 return r;
1919
1920
1921         return 0;
1922 }
1923
1924 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1925
1926 static int gfx_v8_0_sw_init(void *handle)
1927 {
1928         int i, j, k, r, ring_id;
1929         struct amdgpu_ring *ring;
1930         struct amdgpu_kiq *kiq;
1931         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1932
1933         switch (adev->asic_type) {
1934         case CHIP_TONGA:
1935         case CHIP_CARRIZO:
1936         case CHIP_FIJI:
1937         case CHIP_POLARIS10:
1938         case CHIP_POLARIS11:
1939         case CHIP_POLARIS12:
1940         case CHIP_VEGAM:
1941                 adev->gfx.mec.num_mec = 2;
1942                 break;
1943         case CHIP_TOPAZ:
1944         case CHIP_STONEY:
1945         default:
1946                 adev->gfx.mec.num_mec = 1;
1947                 break;
1948         }
1949
1950         adev->gfx.mec.num_pipe_per_mec = 4;
1951         adev->gfx.mec.num_queue_per_pipe = 8;
1952
1953         /* EOP Event */
1954         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1955         if (r)
1956                 return r;
1957
1958         /* Privileged reg */
1959         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1960                               &adev->gfx.priv_reg_irq);
1961         if (r)
1962                 return r;
1963
1964         /* Privileged inst */
1965         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1966                               &adev->gfx.priv_inst_irq);
1967         if (r)
1968                 return r;
1969
1970         /* Add CP EDC/ECC irq  */
1971         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1972                               &adev->gfx.cp_ecc_error_irq);
1973         if (r)
1974                 return r;
1975
1976         /* SQ interrupts. */
1977         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1978                               &adev->gfx.sq_irq);
1979         if (r) {
1980                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1981                 return r;
1982         }
1983
1984         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1985
1986         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1987
1988         gfx_v8_0_scratch_init(adev);
1989
1990         r = gfx_v8_0_init_microcode(adev);
1991         if (r) {
1992                 DRM_ERROR("Failed to load gfx firmware!\n");
1993                 return r;
1994         }
1995
1996         r = adev->gfx.rlc.funcs->init(adev);
1997         if (r) {
1998                 DRM_ERROR("Failed to init rlc BOs!\n");
1999                 return r;
2000         }
2001
2002         r = gfx_v8_0_mec_init(adev);
2003         if (r) {
2004                 DRM_ERROR("Failed to init MEC BOs!\n");
2005                 return r;
2006         }
2007
2008         /* set up the gfx ring */
2009         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2010                 ring = &adev->gfx.gfx_ring[i];
2011                 ring->ring_obj = NULL;
2012                 sprintf(ring->name, "gfx");
2013                 /* no gfx doorbells on iceland */
2014                 if (adev->asic_type != CHIP_TOPAZ) {
2015                         ring->use_doorbell = true;
2016                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2017                 }
2018
2019                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2020                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2021                 if (r)
2022                         return r;
2023         }
2024
2025
2026         /* set up the compute queues - allocate horizontally across pipes */
2027         ring_id = 0;
2028         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2029                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2030                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2031                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2032                                         continue;
2033
2034                                 r = gfx_v8_0_compute_ring_init(adev,
2035                                                                 ring_id,
2036                                                                 i, k, j);
2037                                 if (r)
2038                                         return r;
2039
2040                                 ring_id++;
2041                         }
2042                 }
2043         }
2044
2045         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2046         if (r) {
2047                 DRM_ERROR("Failed to init KIQ BOs!\n");
2048                 return r;
2049         }
2050
2051         kiq = &adev->gfx.kiq;
2052         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2053         if (r)
2054                 return r;
2055
2056         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2057         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2058         if (r)
2059                 return r;
2060
2061         adev->gfx.ce_ram_size = 0x8000;
2062
2063         r = gfx_v8_0_gpu_early_init(adev);
2064         if (r)
2065                 return r;
2066
2067         return 0;
2068 }
2069
2070 static int gfx_v8_0_sw_fini(void *handle)
2071 {
2072         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2073         int i;
2074
2075         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2076                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2077         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2078                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2079
2080         amdgpu_gfx_mqd_sw_fini(adev);
2081         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2082         amdgpu_gfx_kiq_fini(adev);
2083
2084         gfx_v8_0_mec_fini(adev);
2085         amdgpu_gfx_rlc_fini(adev);
2086         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2087                                 &adev->gfx.rlc.clear_state_gpu_addr,
2088                                 (void **)&adev->gfx.rlc.cs_ptr);
2089         if ((adev->asic_type == CHIP_CARRIZO) ||
2090             (adev->asic_type == CHIP_STONEY)) {
2091                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2092                                 &adev->gfx.rlc.cp_table_gpu_addr,
2093                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2094         }
2095         gfx_v8_0_free_microcode(adev);
2096
2097         return 0;
2098 }
2099
2100 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2101 {
2102         uint32_t *modearray, *mod2array;
2103         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2104         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2105         u32 reg_offset;
2106
2107         modearray = adev->gfx.config.tile_mode_array;
2108         mod2array = adev->gfx.config.macrotile_mode_array;
2109
2110         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2111                 modearray[reg_offset] = 0;
2112
2113         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2114                 mod2array[reg_offset] = 0;
2115
2116         switch (adev->asic_type) {
2117         case CHIP_TOPAZ:
2118                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                 PIPE_CONFIG(ADDR_SURF_P2) |
2124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                 PIPE_CONFIG(ADDR_SURF_P2) |
2128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131                                 PIPE_CONFIG(ADDR_SURF_P2) |
2132                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139                                 PIPE_CONFIG(ADDR_SURF_P2) |
2140                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143                                 PIPE_CONFIG(ADDR_SURF_P2) |
2144                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2147                                 PIPE_CONFIG(ADDR_SURF_P2));
2148                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2152                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2157                                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2160                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2161                                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2220
2221                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224                                 NUM_BANKS(ADDR_SURF_8_BANK));
2225                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2228                                 NUM_BANKS(ADDR_SURF_8_BANK));
2229                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232                                 NUM_BANKS(ADDR_SURF_8_BANK));
2233                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236                                 NUM_BANKS(ADDR_SURF_8_BANK));
2237                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                 NUM_BANKS(ADDR_SURF_8_BANK));
2241                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244                                 NUM_BANKS(ADDR_SURF_8_BANK));
2245                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                                 NUM_BANKS(ADDR_SURF_8_BANK));
2249                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                 NUM_BANKS(ADDR_SURF_16_BANK));
2253                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256                                 NUM_BANKS(ADDR_SURF_16_BANK));
2257                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                                  NUM_BANKS(ADDR_SURF_16_BANK));
2261                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264                                  NUM_BANKS(ADDR_SURF_16_BANK));
2265                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2267                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                  NUM_BANKS(ADDR_SURF_16_BANK));
2269                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2271                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272                                  NUM_BANKS(ADDR_SURF_16_BANK));
2273                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                  NUM_BANKS(ADDR_SURF_8_BANK));
2277
2278                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2280                             reg_offset != 23)
2281                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2282
2283                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2284                         if (reg_offset != 7)
2285                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2286
2287                 break;
2288         case CHIP_FIJI:
2289         case CHIP_VEGAM:
2290                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2324                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2336                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2340                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412
2413                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                                 NUM_BANKS(ADDR_SURF_8_BANK));
2417                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420                                 NUM_BANKS(ADDR_SURF_8_BANK));
2421                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424                                 NUM_BANKS(ADDR_SURF_8_BANK));
2425                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2427                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428                                 NUM_BANKS(ADDR_SURF_8_BANK));
2429                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                  NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                  NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                  NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                  NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                  NUM_BANKS(ADDR_SURF_4_BANK));
2469
2470                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2471                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2472
2473                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2474                         if (reg_offset != 7)
2475                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2476
2477                 break;
2478         case CHIP_TONGA:
2479                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2486                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2513                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2514                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2525                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2526                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2529                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601
2602                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                                 NUM_BANKS(ADDR_SURF_16_BANK));
2606                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609                                 NUM_BANKS(ADDR_SURF_16_BANK));
2610                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2613                                 NUM_BANKS(ADDR_SURF_16_BANK));
2614                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617                                 NUM_BANKS(ADDR_SURF_16_BANK));
2618                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2641                                  NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645                                  NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                  NUM_BANKS(ADDR_SURF_8_BANK));
2650                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653                                  NUM_BANKS(ADDR_SURF_4_BANK));
2654                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657                                  NUM_BANKS(ADDR_SURF_4_BANK));
2658
2659                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2660                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2661
2662                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663                         if (reg_offset != 7)
2664                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2665
2666                 break;
2667         case CHIP_POLARIS11:
2668         case CHIP_POLARIS12:
2669                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2715                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791
2792                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795                                 NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800                                 NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805                                 NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810                                 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_8_BANK));
2856
2857                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2860                                 NUM_BANKS(ADDR_SURF_4_BANK));
2861
2862                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2863                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2864
2865                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2866                         if (reg_offset != 7)
2867                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2868
2869                 break;
2870         case CHIP_POLARIS10:
2871                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2905                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2920                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993
2994                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997                                 NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012                                 NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3016                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017                                 NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_8_BANK));
3053
3054                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                                 NUM_BANKS(ADDR_SURF_4_BANK));
3058
3059                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062                                 NUM_BANKS(ADDR_SURF_4_BANK));
3063
3064                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3065                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3066
3067                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3068                         if (reg_offset != 7)
3069                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3070
3071                 break;
3072         case CHIP_STONEY:
3073                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P2) |
3079                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086                                 PIPE_CONFIG(ADDR_SURF_P2) |
3087                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2));
3103                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                  PIPE_CONFIG(ADDR_SURF_P2) |
3109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                                  PIPE_CONFIG(ADDR_SURF_P2) |
3113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3116                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3167                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3175
3176                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179                                 NUM_BANKS(ADDR_SURF_8_BANK));
3180                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183                                 NUM_BANKS(ADDR_SURF_8_BANK));
3184                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_8_BANK));
3188                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191                                 NUM_BANKS(ADDR_SURF_8_BANK));
3192                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199                                 NUM_BANKS(ADDR_SURF_8_BANK));
3200                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                 NUM_BANKS(ADDR_SURF_16_BANK));
3208                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                 NUM_BANKS(ADDR_SURF_16_BANK));
3212                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3213                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                  NUM_BANKS(ADDR_SURF_16_BANK));
3216                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3217                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                  NUM_BANKS(ADDR_SURF_16_BANK));
3220                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3222                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                  NUM_BANKS(ADDR_SURF_16_BANK));
3224                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                  NUM_BANKS(ADDR_SURF_16_BANK));
3228                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231                                  NUM_BANKS(ADDR_SURF_8_BANK));
3232
3233                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3234                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3235                             reg_offset != 23)
3236                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3237
3238                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3239                         if (reg_offset != 7)
3240                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3241
3242                 break;
3243         default:
3244                 dev_warn(adev->dev,
3245                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3246                          adev->asic_type);
3247                 /* fall through */
3248
3249         case CHIP_CARRIZO:
3250                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251                                 PIPE_CONFIG(ADDR_SURF_P2) |
3252                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                 PIPE_CONFIG(ADDR_SURF_P2) |
3256                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                 PIPE_CONFIG(ADDR_SURF_P2) |
3260                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3263                                 PIPE_CONFIG(ADDR_SURF_P2) |
3264                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271                                 PIPE_CONFIG(ADDR_SURF_P2) |
3272                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3273                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275                                 PIPE_CONFIG(ADDR_SURF_P2) |
3276                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2));
3280                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289                                  PIPE_CONFIG(ADDR_SURF_P2) |
3290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3292                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3293                                  PIPE_CONFIG(ADDR_SURF_P2) |
3294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3297                                  PIPE_CONFIG(ADDR_SURF_P2) |
3298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3301                                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3344                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3348                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3352
3353                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356                                 NUM_BANKS(ADDR_SURF_8_BANK));
3357                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360                                 NUM_BANKS(ADDR_SURF_8_BANK));
3361                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                                 NUM_BANKS(ADDR_SURF_8_BANK));
3365                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368                                 NUM_BANKS(ADDR_SURF_8_BANK));
3369                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372                                 NUM_BANKS(ADDR_SURF_8_BANK));
3373                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3376                                 NUM_BANKS(ADDR_SURF_8_BANK));
3377                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380                                 NUM_BANKS(ADDR_SURF_8_BANK));
3381                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_16_BANK));
3385                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_16_BANK));
3389                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3390                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3391                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                                  NUM_BANKS(ADDR_SURF_16_BANK));
3393                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3394                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3395                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396                                  NUM_BANKS(ADDR_SURF_16_BANK));
3397                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3399                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400                                  NUM_BANKS(ADDR_SURF_16_BANK));
3401                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404                                  NUM_BANKS(ADDR_SURF_16_BANK));
3405                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                                  NUM_BANKS(ADDR_SURF_8_BANK));
3409
3410                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3411                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3412                             reg_offset != 23)
3413                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3414
3415                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3416                         if (reg_offset != 7)
3417                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3418
3419                 break;
3420         }
3421 }
3422
3423 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3424                                   u32 se_num, u32 sh_num, u32 instance)
3425 {
3426         u32 data;
3427
3428         if (instance == 0xffffffff)
3429                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3430         else
3431                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3432
3433         if (se_num == 0xffffffff)
3434                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3435         else
3436                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3437
3438         if (sh_num == 0xffffffff)
3439                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3440         else
3441                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3442
3443         WREG32(mmGRBM_GFX_INDEX, data);
3444 }
3445
3446 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3447                                   u32 me, u32 pipe, u32 q, u32 vm)
3448 {
3449         vi_srbm_select(adev, me, pipe, q, vm);
3450 }
3451
3452 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3453 {
3454         u32 data, mask;
3455
3456         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3457                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3458
3459         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3460
3461         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3462                                          adev->gfx.config.max_sh_per_se);
3463
3464         return (~data) & mask;
3465 }
3466
3467 static void
3468 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3469 {
3470         switch (adev->asic_type) {
3471         case CHIP_FIJI:
3472         case CHIP_VEGAM:
3473                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3474                           RB_XSEL2(1) | PKR_MAP(2) |
3475                           PKR_XSEL(1) | PKR_YSEL(1) |
3476                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3477                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3478                            SE_PAIR_YSEL(2);
3479                 break;
3480         case CHIP_TONGA:
3481         case CHIP_POLARIS10:
3482                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3483                           SE_XSEL(1) | SE_YSEL(1);
3484                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3485                            SE_PAIR_YSEL(2);
3486                 break;
3487         case CHIP_TOPAZ:
3488         case CHIP_CARRIZO:
3489                 *rconf |= RB_MAP_PKR0(2);
3490                 *rconf1 |= 0x0;
3491                 break;
3492         case CHIP_POLARIS11:
3493         case CHIP_POLARIS12:
3494                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495                           SE_XSEL(1) | SE_YSEL(1);
3496                 *rconf1 |= 0x0;
3497                 break;
3498         case CHIP_STONEY:
3499                 *rconf |= 0x0;
3500                 *rconf1 |= 0x0;
3501                 break;
3502         default:
3503                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3504                 break;
3505         }
3506 }
3507
3508 static void
3509 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3510                                         u32 raster_config, u32 raster_config_1,
3511                                         unsigned rb_mask, unsigned num_rb)
3512 {
3513         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3514         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3515         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3516         unsigned rb_per_se = num_rb / num_se;
3517         unsigned se_mask[4];
3518         unsigned se;
3519
3520         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3521         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3522         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3523         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3524
3525         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3526         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3527         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3528
3529         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3530                              (!se_mask[2] && !se_mask[3]))) {
3531                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3532
3533                 if (!se_mask[0] && !se_mask[1]) {
3534                         raster_config_1 |=
3535                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3536                 } else {
3537                         raster_config_1 |=
3538                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3539                 }
3540         }
3541
3542         for (se = 0; se < num_se; se++) {
3543                 unsigned raster_config_se = raster_config;
3544                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3545                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3546                 int idx = (se / 2) * 2;
3547
3548                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3549                         raster_config_se &= ~SE_MAP_MASK;
3550
3551                         if (!se_mask[idx]) {
3552                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3553                         } else {
3554                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3555                         }
3556                 }
3557
3558                 pkr0_mask &= rb_mask;
3559                 pkr1_mask &= rb_mask;
3560                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3561                         raster_config_se &= ~PKR_MAP_MASK;
3562
3563                         if (!pkr0_mask) {
3564                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3565                         } else {
3566                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3567                         }
3568                 }
3569
3570                 if (rb_per_se >= 2) {
3571                         unsigned rb0_mask = 1 << (se * rb_per_se);
3572                         unsigned rb1_mask = rb0_mask << 1;
3573
3574                         rb0_mask &= rb_mask;
3575                         rb1_mask &= rb_mask;
3576                         if (!rb0_mask || !rb1_mask) {
3577                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3578
3579                                 if (!rb0_mask) {
3580                                         raster_config_se |=
3581                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3582                                 } else {
3583                                         raster_config_se |=
3584                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3585                                 }
3586                         }
3587
3588                         if (rb_per_se > 2) {
3589                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3590                                 rb1_mask = rb0_mask << 1;
3591                                 rb0_mask &= rb_mask;
3592                                 rb1_mask &= rb_mask;
3593                                 if (!rb0_mask || !rb1_mask) {
3594                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3595
3596                                         if (!rb0_mask) {
3597                                                 raster_config_se |=
3598                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3599                                         } else {
3600                                                 raster_config_se |=
3601                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3602                                         }
3603                                 }
3604                         }
3605                 }
3606
3607                 /* GRBM_GFX_INDEX has a different offset on VI */
3608                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3609                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3610                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611         }
3612
3613         /* GRBM_GFX_INDEX has a different offset on VI */
3614         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3615 }
3616
3617 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3618 {
3619         int i, j;
3620         u32 data;
3621         u32 raster_config = 0, raster_config_1 = 0;
3622         u32 active_rbs = 0;
3623         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3624                                         adev->gfx.config.max_sh_per_se;
3625         unsigned num_rb_pipes;
3626
3627         mutex_lock(&adev->grbm_idx_mutex);
3628         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3631                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3632                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3633                                                rb_bitmap_width_per_sh);
3634                 }
3635         }
3636         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3637
3638         adev->gfx.config.backend_enable_mask = active_rbs;
3639         adev->gfx.config.num_rbs = hweight32(active_rbs);
3640
3641         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3642                              adev->gfx.config.max_shader_engines, 16);
3643
3644         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3645
3646         if (!adev->gfx.config.backend_enable_mask ||
3647                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3648                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3649                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3650         } else {
3651                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3652                                                         adev->gfx.config.backend_enable_mask,
3653                                                         num_rb_pipes);
3654         }
3655
3656         /* cache the values for userspace */
3657         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3658                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3659                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3660                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3661                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3662                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3663                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3664                         adev->gfx.config.rb_config[i][j].raster_config =
3665                                 RREG32(mmPA_SC_RASTER_CONFIG);
3666                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3667                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3668                 }
3669         }
3670         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671         mutex_unlock(&adev->grbm_idx_mutex);
3672 }
3673
3674 /**
3675  * gfx_v8_0_init_compute_vmid - gart enable
3676  *
3677  * @adev: amdgpu_device pointer
3678  *
3679  * Initialize compute vmid sh_mem registers
3680  *
3681  */
3682 #define DEFAULT_SH_MEM_BASES    (0x6000)
3683 #define FIRST_COMPUTE_VMID      (8)
3684 #define LAST_COMPUTE_VMID       (16)
3685 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3686 {
3687         int i;
3688         uint32_t sh_mem_config;
3689         uint32_t sh_mem_bases;
3690
3691         /*
3692          * Configure apertures:
3693          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3694          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3695          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3696          */
3697         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3698
3699         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3700                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3701                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3702                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3703                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3704                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3705
3706         mutex_lock(&adev->srbm_mutex);
3707         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3708                 vi_srbm_select(adev, 0, 0, 0, i);
3709                 /* CP and shaders */
3710                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3711                 WREG32(mmSH_MEM_APE1_BASE, 1);
3712                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3713                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3714         }
3715         vi_srbm_select(adev, 0, 0, 0, 0);
3716         mutex_unlock(&adev->srbm_mutex);
3717
3718         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3719            acccess. These should be enabled by FW for target VMIDs. */
3720         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3721                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3722                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3723                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3724                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3725         }
3726 }
3727
3728 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3729 {
3730         int vmid;
3731
3732         /*
3733          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3734          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3735          * the driver can enable them for graphics. VMID0 should maintain
3736          * access so that HWS firmware can save/restore entries.
3737          */
3738         for (vmid = 1; vmid < 16; vmid++) {
3739                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3740                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3741                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3742                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3743         }
3744 }
3745
3746 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3747 {
3748         switch (adev->asic_type) {
3749         default:
3750                 adev->gfx.config.double_offchip_lds_buf = 1;
3751                 break;
3752         case CHIP_CARRIZO:
3753         case CHIP_STONEY:
3754                 adev->gfx.config.double_offchip_lds_buf = 0;
3755                 break;
3756         }
3757 }
3758
3759 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3760 {
3761         u32 tmp, sh_static_mem_cfg;
3762         int i;
3763
3764         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3765         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3766         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3767         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3768
3769         gfx_v8_0_tiling_mode_table_init(adev);
3770         gfx_v8_0_setup_rb(adev);
3771         gfx_v8_0_get_cu_info(adev);
3772         gfx_v8_0_config_init(adev);
3773
3774         /* XXX SH_MEM regs */
3775         /* where to put LDS, scratch, GPUVM in FSA64 space */
3776         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3777                                    SWIZZLE_ENABLE, 1);
3778         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779                                    ELEMENT_SIZE, 1);
3780         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3781                                    INDEX_STRIDE, 3);
3782         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3783
3784         mutex_lock(&adev->srbm_mutex);
3785         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3786                 vi_srbm_select(adev, 0, 0, 0, i);
3787                 /* CP and shaders */
3788                 if (i == 0) {
3789                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3790                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3791                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3792                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3793                         WREG32(mmSH_MEM_CONFIG, tmp);
3794                         WREG32(mmSH_MEM_BASES, 0);
3795                 } else {
3796                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3797                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3798                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3799                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3800                         WREG32(mmSH_MEM_CONFIG, tmp);
3801                         tmp = adev->gmc.shared_aperture_start >> 48;
3802                         WREG32(mmSH_MEM_BASES, tmp);
3803                 }
3804
3805                 WREG32(mmSH_MEM_APE1_BASE, 1);
3806                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3807         }
3808         vi_srbm_select(adev, 0, 0, 0, 0);
3809         mutex_unlock(&adev->srbm_mutex);
3810
3811         gfx_v8_0_init_compute_vmid(adev);
3812         gfx_v8_0_init_gds_vmid(adev);
3813
3814         mutex_lock(&adev->grbm_idx_mutex);
3815         /*
3816          * making sure that the following register writes will be broadcasted
3817          * to all the shaders
3818          */
3819         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3820
3821         WREG32(mmPA_SC_FIFO_SIZE,
3822                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3823                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3824                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3825                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3826                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3827                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3828                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3829                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3830
3831         tmp = RREG32(mmSPI_ARB_PRIORITY);
3832         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3833         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3834         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3835         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3836         WREG32(mmSPI_ARB_PRIORITY, tmp);
3837
3838         mutex_unlock(&adev->grbm_idx_mutex);
3839
3840 }
3841
3842 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3843 {
3844         u32 i, j, k;
3845         u32 mask;
3846
3847         mutex_lock(&adev->grbm_idx_mutex);
3848         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3849                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3850                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3851                         for (k = 0; k < adev->usec_timeout; k++) {
3852                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3853                                         break;
3854                                 udelay(1);
3855                         }
3856                         if (k == adev->usec_timeout) {
3857                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3858                                                       0xffffffff, 0xffffffff);
3859                                 mutex_unlock(&adev->grbm_idx_mutex);
3860                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3861                                          i, j);
3862                                 return;
3863                         }
3864                 }
3865         }
3866         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3867         mutex_unlock(&adev->grbm_idx_mutex);
3868
3869         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3870                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3871                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3872                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3873         for (k = 0; k < adev->usec_timeout; k++) {
3874                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3875                         break;
3876                 udelay(1);
3877         }
3878 }
3879
3880 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3881                                                bool enable)
3882 {
3883         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3884
3885         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3886         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3887         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3888         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3889
3890         WREG32(mmCP_INT_CNTL_RING0, tmp);
3891 }
3892
3893 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3894 {
3895         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3896         /* csib */
3897         WREG32(mmRLC_CSIB_ADDR_HI,
3898                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3899         WREG32(mmRLC_CSIB_ADDR_LO,
3900                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3901         WREG32(mmRLC_CSIB_LENGTH,
3902                         adev->gfx.rlc.clear_state_size);
3903 }
3904
3905 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3906                                 int ind_offset,
3907                                 int list_size,
3908                                 int *unique_indices,
3909                                 int *indices_count,
3910                                 int max_indices,
3911                                 int *ind_start_offsets,
3912                                 int *offset_count,
3913                                 int max_offset)
3914 {
3915         int indices;
3916         bool new_entry = true;
3917
3918         for (; ind_offset < list_size; ind_offset++) {
3919
3920                 if (new_entry) {
3921                         new_entry = false;
3922                         ind_start_offsets[*offset_count] = ind_offset;
3923                         *offset_count = *offset_count + 1;
3924                         BUG_ON(*offset_count >= max_offset);
3925                 }
3926
3927                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3928                         new_entry = true;
3929                         continue;
3930                 }
3931
3932                 ind_offset += 2;
3933
3934                 /* look for the matching indice */
3935                 for (indices = 0;
3936                         indices < *indices_count;
3937                         indices++) {
3938                         if (unique_indices[indices] ==
3939                                 register_list_format[ind_offset])
3940                                 break;
3941                 }
3942
3943                 if (indices >= *indices_count) {
3944                         unique_indices[*indices_count] =
3945                                 register_list_format[ind_offset];
3946                         indices = *indices_count;
3947                         *indices_count = *indices_count + 1;
3948                         BUG_ON(*indices_count >= max_indices);
3949                 }
3950
3951                 register_list_format[ind_offset] = indices;
3952         }
3953 }
3954
3955 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3956 {
3957         int i, temp, data;
3958         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3959         int indices_count = 0;
3960         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3961         int offset_count = 0;
3962
3963         int list_size;
3964         unsigned int *register_list_format =
3965                 kmemdup(adev->gfx.rlc.register_list_format,
3966                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3967         if (!register_list_format)
3968                 return -ENOMEM;
3969
3970         gfx_v8_0_parse_ind_reg_list(register_list_format,
3971                                 RLC_FormatDirectRegListLength,
3972                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3973                                 unique_indices,
3974                                 &indices_count,
3975                                 ARRAY_SIZE(unique_indices),
3976                                 indirect_start_offsets,
3977                                 &offset_count,
3978                                 ARRAY_SIZE(indirect_start_offsets));
3979
3980         /* save and restore list */
3981         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3982
3983         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3984         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3985                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3986
3987         /* indirect list */
3988         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3989         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3990                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3991
3992         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3993         list_size = list_size >> 1;
3994         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3995         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3996
3997         /* starting offsets starts */
3998         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3999                 adev->gfx.rlc.starting_offsets_start);
4000         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4001                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4002                                 indirect_start_offsets[i]);
4003
4004         /* unique indices */
4005         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4006         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4007         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4008                 if (unique_indices[i] != 0) {
4009                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4010                         WREG32(data + i, unique_indices[i] >> 20);
4011                 }
4012         }
4013         kfree(register_list_format);
4014
4015         return 0;
4016 }
4017
4018 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4019 {
4020         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4021 }
4022
4023 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4024 {
4025         uint32_t data;
4026
4027         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4028
4029         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4030         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4031         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4032         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4033         WREG32(mmRLC_PG_DELAY, data);
4034
4035         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4036         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4037
4038 }
4039
4040 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4041                                                 bool enable)
4042 {
4043         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4044 }
4045
4046 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4047                                                   bool enable)
4048 {
4049         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4050 }
4051
4052 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4053 {
4054         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4055 }
4056
4057 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4058 {
4059         if ((adev->asic_type == CHIP_CARRIZO) ||
4060             (adev->asic_type == CHIP_STONEY)) {
4061                 gfx_v8_0_init_csb(adev);
4062                 gfx_v8_0_init_save_restore_list(adev);
4063                 gfx_v8_0_enable_save_restore_machine(adev);
4064                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4065                 gfx_v8_0_init_power_gating(adev);
4066                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4067         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4068                    (adev->asic_type == CHIP_POLARIS12) ||
4069                    (adev->asic_type == CHIP_VEGAM)) {
4070                 gfx_v8_0_init_csb(adev);
4071                 gfx_v8_0_init_save_restore_list(adev);
4072                 gfx_v8_0_enable_save_restore_machine(adev);
4073                 gfx_v8_0_init_power_gating(adev);
4074         }
4075
4076 }
4077
4078 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4079 {
4080         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4081
4082         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4083         gfx_v8_0_wait_for_rlc_serdes(adev);
4084 }
4085
4086 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4087 {
4088         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4089         udelay(50);
4090
4091         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4092         udelay(50);
4093 }
4094
4095 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4096 {
4097         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4098
4099         /* carrizo do enable cp interrupt after cp inited */
4100         if (!(adev->flags & AMD_IS_APU))
4101                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4102
4103         udelay(50);
4104 }
4105
4106 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4107 {
4108         if (amdgpu_sriov_vf(adev)) {
4109                 gfx_v8_0_init_csb(adev);
4110                 return 0;
4111         }
4112
4113         adev->gfx.rlc.funcs->stop(adev);
4114         adev->gfx.rlc.funcs->reset(adev);
4115         gfx_v8_0_init_pg(adev);
4116         adev->gfx.rlc.funcs->start(adev);
4117
4118         return 0;
4119 }
4120
4121 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4122 {
4123         int i;
4124         u32 tmp = RREG32(mmCP_ME_CNTL);
4125
4126         if (enable) {
4127                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4128                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4129                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4130         } else {
4131                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4132                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4133                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4134                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4135                         adev->gfx.gfx_ring[i].sched.ready = false;
4136         }
4137         WREG32(mmCP_ME_CNTL, tmp);
4138         udelay(50);
4139 }
4140
4141 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4142 {
4143         u32 count = 0;
4144         const struct cs_section_def *sect = NULL;
4145         const struct cs_extent_def *ext = NULL;
4146
4147         /* begin clear state */
4148         count += 2;
4149         /* context control state */
4150         count += 3;
4151
4152         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4153                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4154                         if (sect->id == SECT_CONTEXT)
4155                                 count += 2 + ext->reg_count;
4156                         else
4157                                 return 0;
4158                 }
4159         }
4160         /* pa_sc_raster_config/pa_sc_raster_config1 */
4161         count += 4;
4162         /* end clear state */
4163         count += 2;
4164         /* clear state */
4165         count += 2;
4166
4167         return count;
4168 }
4169
4170 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4171 {
4172         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4173         const struct cs_section_def *sect = NULL;
4174         const struct cs_extent_def *ext = NULL;
4175         int r, i;
4176
4177         /* init the CP */
4178         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4179         WREG32(mmCP_ENDIAN_SWAP, 0);
4180         WREG32(mmCP_DEVICE_ID, 1);
4181
4182         gfx_v8_0_cp_gfx_enable(adev, true);
4183
4184         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4185         if (r) {
4186                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4187                 return r;
4188         }
4189
4190         /* clear state buffer */
4191         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4192         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4193
4194         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4195         amdgpu_ring_write(ring, 0x80000000);
4196         amdgpu_ring_write(ring, 0x80000000);
4197
4198         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4199                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4200                         if (sect->id == SECT_CONTEXT) {
4201                                 amdgpu_ring_write(ring,
4202                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4203                                                ext->reg_count));
4204                                 amdgpu_ring_write(ring,
4205                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4206                                 for (i = 0; i < ext->reg_count; i++)
4207                                         amdgpu_ring_write(ring, ext->extent[i]);
4208                         }
4209                 }
4210         }
4211
4212         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4213         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4214         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4215         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4216
4217         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4218         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4219
4220         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4221         amdgpu_ring_write(ring, 0);
4222
4223         /* init the CE partitions */
4224         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4225         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4226         amdgpu_ring_write(ring, 0x8000);
4227         amdgpu_ring_write(ring, 0x8000);
4228
4229         amdgpu_ring_commit(ring);
4230
4231         return 0;
4232 }
4233 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4234 {
4235         u32 tmp;
4236         /* no gfx doorbells on iceland */
4237         if (adev->asic_type == CHIP_TOPAZ)
4238                 return;
4239
4240         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4241
4242         if (ring->use_doorbell) {
4243                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4244                                 DOORBELL_OFFSET, ring->doorbell_index);
4245                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4246                                                 DOORBELL_HIT, 0);
4247                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4248                                             DOORBELL_EN, 1);
4249         } else {
4250                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4251         }
4252
4253         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4254
4255         if (adev->flags & AMD_IS_APU)
4256                 return;
4257
4258         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4259                                         DOORBELL_RANGE_LOWER,
4260                                         adev->doorbell_index.gfx_ring0);
4261         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4262
4263         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4264                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4265 }
4266
4267 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4268 {
4269         struct amdgpu_ring *ring;
4270         u32 tmp;
4271         u32 rb_bufsz;
4272         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4273
4274         /* Set the write pointer delay */
4275         WREG32(mmCP_RB_WPTR_DELAY, 0);
4276
4277         /* set the RB to use vmid 0 */
4278         WREG32(mmCP_RB_VMID, 0);
4279
4280         /* Set ring buffer size */
4281         ring = &adev->gfx.gfx_ring[0];
4282         rb_bufsz = order_base_2(ring->ring_size / 8);
4283         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4284         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4285         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4286         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4287 #ifdef __BIG_ENDIAN
4288         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4289 #endif
4290         WREG32(mmCP_RB0_CNTL, tmp);
4291
4292         /* Initialize the ring buffer's read and write pointers */
4293         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4294         ring->wptr = 0;
4295         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4296
4297         /* set the wb address wether it's enabled or not */
4298         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4299         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4300         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4301
4302         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4303         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4304         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4305         mdelay(1);
4306         WREG32(mmCP_RB0_CNTL, tmp);
4307
4308         rb_addr = ring->gpu_addr >> 8;
4309         WREG32(mmCP_RB0_BASE, rb_addr);
4310         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4311
4312         gfx_v8_0_set_cpg_door_bell(adev, ring);
4313         /* start the ring */
4314         amdgpu_ring_clear_ring(ring);
4315         gfx_v8_0_cp_gfx_start(adev);
4316         ring->sched.ready = true;
4317
4318         return 0;
4319 }
4320
4321 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4322 {
4323         int i;
4324
4325         if (enable) {
4326                 WREG32(mmCP_MEC_CNTL, 0);
4327         } else {
4328                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4329                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4330                         adev->gfx.compute_ring[i].sched.ready = false;
4331                 adev->gfx.kiq.ring.sched.ready = false;
4332         }
4333         udelay(50);
4334 }
4335
4336 /* KIQ functions */
4337 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4338 {
4339         uint32_t tmp;
4340         struct amdgpu_device *adev = ring->adev;
4341
4342         /* tell RLC which is KIQ queue */
4343         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4344         tmp &= 0xffffff00;
4345         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4346         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4347         tmp |= 0x80;
4348         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4349 }
4350
4351 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4352 {
4353         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4354         uint64_t queue_mask = 0;
4355         int r, i;
4356
4357         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4358                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4359                         continue;
4360
4361                 /* This situation may be hit in the future if a new HW
4362                  * generation exposes more than 64 queues. If so, the
4363                  * definition of queue_mask needs updating */
4364                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4365                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4366                         break;
4367                 }
4368
4369                 queue_mask |= (1ull << i);
4370         }
4371
4372         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4373         if (r) {
4374                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4375                 return r;
4376         }
4377         /* set resources */
4378         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4379         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4380         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4381         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4382         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4383         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4384         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4385         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4386         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4387                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4388                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4389                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4390
4391                 /* map queues */
4392                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4393                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4394                 amdgpu_ring_write(kiq_ring,
4395                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4396                 amdgpu_ring_write(kiq_ring,
4397                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4398                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4399                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4400                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4401                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4402                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4403                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4404                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4405         }
4406
4407         amdgpu_ring_commit(kiq_ring);
4408
4409         return 0;
4410 }
4411
4412 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4413 {
4414         int i, r = 0;
4415
4416         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4417                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4418                 for (i = 0; i < adev->usec_timeout; i++) {
4419                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4420                                 break;
4421                         udelay(1);
4422                 }
4423                 if (i == adev->usec_timeout)
4424                         r = -ETIMEDOUT;
4425         }
4426         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4427         WREG32(mmCP_HQD_PQ_RPTR, 0);
4428         WREG32(mmCP_HQD_PQ_WPTR, 0);
4429
4430         return r;
4431 }
4432
4433 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4434 {
4435         struct amdgpu_device *adev = ring->adev;
4436
4437         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4438                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4439                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4440                         ring->has_high_prio = true;
4441                         mqd->cp_hqd_queue_priority =
4442                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4443                 } else {
4444                         ring->has_high_prio = false;
4445                 }
4446         }
4447 }
4448
4449 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4450 {
4451         struct amdgpu_device *adev = ring->adev;
4452         struct vi_mqd *mqd = ring->mqd_ptr;
4453         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4454         uint32_t tmp;
4455
4456         mqd->header = 0xC0310800;
4457         mqd->compute_pipelinestat_enable = 0x00000001;
4458         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4459         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4460         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4461         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4462         mqd->compute_misc_reserved = 0x00000003;
4463         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4464                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4465         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4466                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4467         eop_base_addr = ring->eop_gpu_addr >> 8;
4468         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4469         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4470
4471         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4472         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4473         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4474                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4475
4476         mqd->cp_hqd_eop_control = tmp;
4477
4478         /* enable doorbell? */
4479         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4480                             CP_HQD_PQ_DOORBELL_CONTROL,
4481                             DOORBELL_EN,
4482                             ring->use_doorbell ? 1 : 0);
4483
4484         mqd->cp_hqd_pq_doorbell_control = tmp;
4485
4486         /* set the pointer to the MQD */
4487         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4488         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4489
4490         /* set MQD vmid to 0 */
4491         tmp = RREG32(mmCP_MQD_CONTROL);
4492         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4493         mqd->cp_mqd_control = tmp;
4494
4495         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4496         hqd_gpu_addr = ring->gpu_addr >> 8;
4497         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4498         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4499
4500         /* set up the HQD, this is similar to CP_RB0_CNTL */
4501         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4502         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4503                             (order_base_2(ring->ring_size / 4) - 1));
4504         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4505                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4506 #ifdef __BIG_ENDIAN
4507         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4508 #endif
4509         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4510         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4511         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4512         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4513         mqd->cp_hqd_pq_control = tmp;
4514
4515         /* set the wb address whether it's enabled or not */
4516         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4517         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4518         mqd->cp_hqd_pq_rptr_report_addr_hi =
4519                 upper_32_bits(wb_gpu_addr) & 0xffff;
4520
4521         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4522         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4523         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4524         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4525
4526         tmp = 0;
4527         /* enable the doorbell if requested */
4528         if (ring->use_doorbell) {
4529                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4530                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4531                                 DOORBELL_OFFSET, ring->doorbell_index);
4532
4533                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4534                                          DOORBELL_EN, 1);
4535                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4536                                          DOORBELL_SOURCE, 0);
4537                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4538                                          DOORBELL_HIT, 0);
4539         }
4540
4541         mqd->cp_hqd_pq_doorbell_control = tmp;
4542
4543         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4544         ring->wptr = 0;
4545         mqd->cp_hqd_pq_wptr = ring->wptr;
4546         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4547
4548         /* set the vmid for the queue */
4549         mqd->cp_hqd_vmid = 0;
4550
4551         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4552         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4553         mqd->cp_hqd_persistent_state = tmp;
4554
4555         /* set MTYPE */
4556         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4557         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4558         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4559         mqd->cp_hqd_ib_control = tmp;
4560
4561         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4562         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4563         mqd->cp_hqd_iq_timer = tmp;
4564
4565         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4566         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4567         mqd->cp_hqd_ctx_save_control = tmp;
4568
4569         /* defaults */
4570         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4571         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4572         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4573         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4574         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4575         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4576         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4577         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4578         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4579         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4580         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4581         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4582
4583         /* set static priority for a queue/ring */
4584         gfx_v8_0_mqd_set_priority(ring, mqd);
4585         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4586
4587         /* map_queues packet doesn't need activate the queue,
4588          * so only kiq need set this field.
4589          */
4590         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4591                 mqd->cp_hqd_active = 1;
4592
4593         return 0;
4594 }
4595
4596 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4597                         struct vi_mqd *mqd)
4598 {
4599         uint32_t mqd_reg;
4600         uint32_t *mqd_data;
4601
4602         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4603         mqd_data = &mqd->cp_mqd_base_addr_lo;
4604
4605         /* disable wptr polling */
4606         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4607
4608         /* program all HQD registers */
4609         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4610                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4611
4612         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4613          * This is safe since EOP RPTR==WPTR for any inactive HQD
4614          * on ASICs that do not support context-save.
4615          * EOP writes/reads can start anywhere in the ring.
4616          */
4617         if (adev->asic_type != CHIP_TONGA) {
4618                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4619                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4620                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4621         }
4622
4623         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4624                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4625
4626         /* activate the HQD */
4627         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4628                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4629
4630         return 0;
4631 }
4632
4633 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4634 {
4635         struct amdgpu_device *adev = ring->adev;
4636         struct vi_mqd *mqd = ring->mqd_ptr;
4637         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4638
4639         gfx_v8_0_kiq_setting(ring);
4640
4641         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4642                 /* reset MQD to a clean status */
4643                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4644                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4645
4646                 /* reset ring buffer */
4647                 ring->wptr = 0;
4648                 amdgpu_ring_clear_ring(ring);
4649                 mutex_lock(&adev->srbm_mutex);
4650                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4651                 gfx_v8_0_mqd_commit(adev, mqd);
4652                 vi_srbm_select(adev, 0, 0, 0, 0);
4653                 mutex_unlock(&adev->srbm_mutex);
4654         } else {
4655                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4656                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4657                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4658                 mutex_lock(&adev->srbm_mutex);
4659                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4660                 gfx_v8_0_mqd_init(ring);
4661                 gfx_v8_0_mqd_commit(adev, mqd);
4662                 vi_srbm_select(adev, 0, 0, 0, 0);
4663                 mutex_unlock(&adev->srbm_mutex);
4664
4665                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4666                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4667         }
4668
4669         return 0;
4670 }
4671
4672 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4673 {
4674         struct amdgpu_device *adev = ring->adev;
4675         struct vi_mqd *mqd = ring->mqd_ptr;
4676         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4677
4678         if (!adev->in_gpu_reset && !adev->in_suspend) {
4679                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4680                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4681                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4682                 mutex_lock(&adev->srbm_mutex);
4683                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4684                 gfx_v8_0_mqd_init(ring);
4685                 vi_srbm_select(adev, 0, 0, 0, 0);
4686                 mutex_unlock(&adev->srbm_mutex);
4687
4688                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4689                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4690         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4691                 /* reset MQD to a clean status */
4692                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4693                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4694                 /* reset ring buffer */
4695                 ring->wptr = 0;
4696                 amdgpu_ring_clear_ring(ring);
4697         } else {
4698                 amdgpu_ring_clear_ring(ring);
4699         }
4700         return 0;
4701 }
4702
4703 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4704 {
4705         if (adev->asic_type > CHIP_TONGA) {
4706                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4707                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4708         }
4709         /* enable doorbells */
4710         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4711 }
4712
4713 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4714 {
4715         struct amdgpu_ring *ring;
4716         int r;
4717
4718         ring = &adev->gfx.kiq.ring;
4719
4720         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4721         if (unlikely(r != 0))
4722                 return r;
4723
4724         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4725         if (unlikely(r != 0))
4726                 return r;
4727
4728         gfx_v8_0_kiq_init_queue(ring);
4729         amdgpu_bo_kunmap(ring->mqd_obj);
4730         ring->mqd_ptr = NULL;
4731         amdgpu_bo_unreserve(ring->mqd_obj);
4732         ring->sched.ready = true;
4733         return 0;
4734 }
4735
4736 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4737 {
4738         struct amdgpu_ring *ring = NULL;
4739         int r = 0, i;
4740
4741         gfx_v8_0_cp_compute_enable(adev, true);
4742
4743         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4744                 ring = &adev->gfx.compute_ring[i];
4745
4746                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4747                 if (unlikely(r != 0))
4748                         goto done;
4749                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4750                 if (!r) {
4751                         r = gfx_v8_0_kcq_init_queue(ring);
4752                         amdgpu_bo_kunmap(ring->mqd_obj);
4753                         ring->mqd_ptr = NULL;
4754                 }
4755                 amdgpu_bo_unreserve(ring->mqd_obj);
4756                 if (r)
4757                         goto done;
4758         }
4759
4760         gfx_v8_0_set_mec_doorbell_range(adev);
4761
4762         r = gfx_v8_0_kiq_kcq_enable(adev);
4763         if (r)
4764                 goto done;
4765
4766 done:
4767         return r;
4768 }
4769
4770 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4771 {
4772         int r, i;
4773         struct amdgpu_ring *ring;
4774
4775         /* collect all the ring_tests here, gfx, kiq, compute */
4776         ring = &adev->gfx.gfx_ring[0];
4777         r = amdgpu_ring_test_helper(ring);
4778         if (r)
4779                 return r;
4780
4781         ring = &adev->gfx.kiq.ring;
4782         r = amdgpu_ring_test_helper(ring);
4783         if (r)
4784                 return r;
4785
4786         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4787                 ring = &adev->gfx.compute_ring[i];
4788                 amdgpu_ring_test_helper(ring);
4789         }
4790
4791         return 0;
4792 }
4793
4794 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4795 {
4796         int r;
4797
4798         if (!(adev->flags & AMD_IS_APU))
4799                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4800
4801         r = gfx_v8_0_kiq_resume(adev);
4802         if (r)
4803                 return r;
4804
4805         r = gfx_v8_0_cp_gfx_resume(adev);
4806         if (r)
4807                 return r;
4808
4809         r = gfx_v8_0_kcq_resume(adev);
4810         if (r)
4811                 return r;
4812
4813         r = gfx_v8_0_cp_test_all_rings(adev);
4814         if (r)
4815                 return r;
4816
4817         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4818
4819         return 0;
4820 }
4821
4822 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4823 {
4824         gfx_v8_0_cp_gfx_enable(adev, enable);
4825         gfx_v8_0_cp_compute_enable(adev, enable);
4826 }
4827
4828 static int gfx_v8_0_hw_init(void *handle)
4829 {
4830         int r;
4831         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4832
4833         gfx_v8_0_init_golden_registers(adev);
4834         gfx_v8_0_constants_init(adev);
4835
4836         r = adev->gfx.rlc.funcs->resume(adev);
4837         if (r)
4838                 return r;
4839
4840         r = gfx_v8_0_cp_resume(adev);
4841
4842         return r;
4843 }
4844
4845 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4846 {
4847         int r, i;
4848         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4849
4850         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4851         if (r)
4852                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4853
4854         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4855                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4856
4857                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4858                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4859                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4860                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4861                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4862                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4863                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4864                 amdgpu_ring_write(kiq_ring, 0);
4865                 amdgpu_ring_write(kiq_ring, 0);
4866                 amdgpu_ring_write(kiq_ring, 0);
4867         }
4868         r = amdgpu_ring_test_helper(kiq_ring);
4869         if (r)
4870                 DRM_ERROR("KCQ disable failed\n");
4871
4872         return r;
4873 }
4874
4875 static bool gfx_v8_0_is_idle(void *handle)
4876 {
4877         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4878
4879         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4880                 || RREG32(mmGRBM_STATUS2) != 0x8)
4881                 return false;
4882         else
4883                 return true;
4884 }
4885
4886 static bool gfx_v8_0_rlc_is_idle(void *handle)
4887 {
4888         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4889
4890         if (RREG32(mmGRBM_STATUS2) != 0x8)
4891                 return false;
4892         else
4893                 return true;
4894 }
4895
4896 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4897 {
4898         unsigned int i;
4899         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4900
4901         for (i = 0; i < adev->usec_timeout; i++) {
4902                 if (gfx_v8_0_rlc_is_idle(handle))
4903                         return 0;
4904
4905                 udelay(1);
4906         }
4907         return -ETIMEDOUT;
4908 }
4909
4910 static int gfx_v8_0_wait_for_idle(void *handle)
4911 {
4912         unsigned int i;
4913         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4914
4915         for (i = 0; i < adev->usec_timeout; i++) {
4916                 if (gfx_v8_0_is_idle(handle))
4917                         return 0;
4918
4919                 udelay(1);
4920         }
4921         return -ETIMEDOUT;
4922 }
4923
4924 static int gfx_v8_0_hw_fini(void *handle)
4925 {
4926         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4927
4928         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4929         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4930
4931         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4932
4933         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4934
4935         /* disable KCQ to avoid CPC touch memory not valid anymore */
4936         gfx_v8_0_kcq_disable(adev);
4937
4938         if (amdgpu_sriov_vf(adev)) {
4939                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4940                 return 0;
4941         }
4942         amdgpu_gfx_rlc_enter_safe_mode(adev);
4943         if (!gfx_v8_0_wait_for_idle(adev))
4944                 gfx_v8_0_cp_enable(adev, false);
4945         else
4946                 pr_err("cp is busy, skip halt cp\n");
4947         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4948                 adev->gfx.rlc.funcs->stop(adev);
4949         else
4950                 pr_err("rlc is busy, skip halt rlc\n");
4951         amdgpu_gfx_rlc_exit_safe_mode(adev);
4952
4953         return 0;
4954 }
4955
4956 static int gfx_v8_0_suspend(void *handle)
4957 {
4958         return gfx_v8_0_hw_fini(handle);
4959 }
4960
4961 static int gfx_v8_0_resume(void *handle)
4962 {
4963         return gfx_v8_0_hw_init(handle);
4964 }
4965
4966 static bool gfx_v8_0_check_soft_reset(void *handle)
4967 {
4968         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4969         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4970         u32 tmp;
4971
4972         /* GRBM_STATUS */
4973         tmp = RREG32(mmGRBM_STATUS);
4974         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4975                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4976                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4977                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4978                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4979                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4980                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4981                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4982                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4983                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4984                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4985                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4986                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4987         }
4988
4989         /* GRBM_STATUS2 */
4990         tmp = RREG32(mmGRBM_STATUS2);
4991         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4992                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4993                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4994
4995         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4996             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4997             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4998                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4999                                                 SOFT_RESET_CPF, 1);
5000                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5001                                                 SOFT_RESET_CPC, 1);
5002                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5003                                                 SOFT_RESET_CPG, 1);
5004                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5005                                                 SOFT_RESET_GRBM, 1);
5006         }
5007
5008         /* SRBM_STATUS */
5009         tmp = RREG32(mmSRBM_STATUS);
5010         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5011                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5012                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5013         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5014                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5015                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5016
5017         if (grbm_soft_reset || srbm_soft_reset) {
5018                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5019                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5020                 return true;
5021         } else {
5022                 adev->gfx.grbm_soft_reset = 0;
5023                 adev->gfx.srbm_soft_reset = 0;
5024                 return false;
5025         }
5026 }
5027
5028 static int gfx_v8_0_pre_soft_reset(void *handle)
5029 {
5030         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5031         u32 grbm_soft_reset = 0;
5032
5033         if ((!adev->gfx.grbm_soft_reset) &&
5034             (!adev->gfx.srbm_soft_reset))
5035                 return 0;
5036
5037         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5038
5039         /* stop the rlc */
5040         adev->gfx.rlc.funcs->stop(adev);
5041
5042         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5043             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5044                 /* Disable GFX parsing/prefetching */
5045                 gfx_v8_0_cp_gfx_enable(adev, false);
5046
5047         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5048             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5049             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5050             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5051                 int i;
5052
5053                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5054                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5055
5056                         mutex_lock(&adev->srbm_mutex);
5057                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5058                         gfx_v8_0_deactivate_hqd(adev, 2);
5059                         vi_srbm_select(adev, 0, 0, 0, 0);
5060                         mutex_unlock(&adev->srbm_mutex);
5061                 }
5062                 /* Disable MEC parsing/prefetching */
5063                 gfx_v8_0_cp_compute_enable(adev, false);
5064         }
5065
5066        return 0;
5067 }
5068
5069 static int gfx_v8_0_soft_reset(void *handle)
5070 {
5071         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5072         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5073         u32 tmp;
5074
5075         if ((!adev->gfx.grbm_soft_reset) &&
5076             (!adev->gfx.srbm_soft_reset))
5077                 return 0;
5078
5079         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5080         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5081
5082         if (grbm_soft_reset || srbm_soft_reset) {
5083                 tmp = RREG32(mmGMCON_DEBUG);
5084                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5085                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5086                 WREG32(mmGMCON_DEBUG, tmp);
5087                 udelay(50);
5088         }
5089
5090         if (grbm_soft_reset) {
5091                 tmp = RREG32(mmGRBM_SOFT_RESET);
5092                 tmp |= grbm_soft_reset;
5093                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5094                 WREG32(mmGRBM_SOFT_RESET, tmp);
5095                 tmp = RREG32(mmGRBM_SOFT_RESET);
5096
5097                 udelay(50);
5098
5099                 tmp &= ~grbm_soft_reset;
5100                 WREG32(mmGRBM_SOFT_RESET, tmp);
5101                 tmp = RREG32(mmGRBM_SOFT_RESET);
5102         }
5103
5104         if (srbm_soft_reset) {
5105                 tmp = RREG32(mmSRBM_SOFT_RESET);
5106                 tmp |= srbm_soft_reset;
5107                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5108                 WREG32(mmSRBM_SOFT_RESET, tmp);
5109                 tmp = RREG32(mmSRBM_SOFT_RESET);
5110
5111                 udelay(50);
5112
5113                 tmp &= ~srbm_soft_reset;
5114                 WREG32(mmSRBM_SOFT_RESET, tmp);
5115                 tmp = RREG32(mmSRBM_SOFT_RESET);
5116         }
5117
5118         if (grbm_soft_reset || srbm_soft_reset) {
5119                 tmp = RREG32(mmGMCON_DEBUG);
5120                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5121                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5122                 WREG32(mmGMCON_DEBUG, tmp);
5123         }
5124
5125         /* Wait a little for things to settle down */
5126         udelay(50);
5127
5128         return 0;
5129 }
5130
5131 static int gfx_v8_0_post_soft_reset(void *handle)
5132 {
5133         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5134         u32 grbm_soft_reset = 0;
5135
5136         if ((!adev->gfx.grbm_soft_reset) &&
5137             (!adev->gfx.srbm_soft_reset))
5138                 return 0;
5139
5140         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5141
5142         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5143             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5144             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5145             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5146                 int i;
5147
5148                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5149                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5150
5151                         mutex_lock(&adev->srbm_mutex);
5152                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5153                         gfx_v8_0_deactivate_hqd(adev, 2);
5154                         vi_srbm_select(adev, 0, 0, 0, 0);
5155                         mutex_unlock(&adev->srbm_mutex);
5156                 }
5157                 gfx_v8_0_kiq_resume(adev);
5158                 gfx_v8_0_kcq_resume(adev);
5159         }
5160
5161         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5162             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5163                 gfx_v8_0_cp_gfx_resume(adev);
5164
5165         gfx_v8_0_cp_test_all_rings(adev);
5166
5167         adev->gfx.rlc.funcs->start(adev);
5168
5169         return 0;
5170 }
5171
5172 /**
5173  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5174  *
5175  * @adev: amdgpu_device pointer
5176  *
5177  * Fetches a GPU clock counter snapshot.
5178  * Returns the 64 bit clock counter snapshot.
5179  */
5180 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5181 {
5182         uint64_t clock;
5183
5184         mutex_lock(&adev->gfx.gpu_clock_mutex);
5185         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5186         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5187                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5188         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5189         return clock;
5190 }
5191
5192 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5193                                           uint32_t vmid,
5194                                           uint32_t gds_base, uint32_t gds_size,
5195                                           uint32_t gws_base, uint32_t gws_size,
5196                                           uint32_t oa_base, uint32_t oa_size)
5197 {
5198         /* GDS Base */
5199         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5200         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5201                                 WRITE_DATA_DST_SEL(0)));
5202         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5203         amdgpu_ring_write(ring, 0);
5204         amdgpu_ring_write(ring, gds_base);
5205
5206         /* GDS Size */
5207         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5208         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5209                                 WRITE_DATA_DST_SEL(0)));
5210         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5211         amdgpu_ring_write(ring, 0);
5212         amdgpu_ring_write(ring, gds_size);
5213
5214         /* GWS */
5215         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5216         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5217                                 WRITE_DATA_DST_SEL(0)));
5218         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5219         amdgpu_ring_write(ring, 0);
5220         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5221
5222         /* OA */
5223         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5224         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5225                                 WRITE_DATA_DST_SEL(0)));
5226         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5227         amdgpu_ring_write(ring, 0);
5228         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5229 }
5230
5231 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5232 {
5233         WREG32(mmSQ_IND_INDEX,
5234                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5235                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5236                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5237                 (SQ_IND_INDEX__FORCE_READ_MASK));
5238         return RREG32(mmSQ_IND_DATA);
5239 }
5240
5241 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5242                            uint32_t wave, uint32_t thread,
5243                            uint32_t regno, uint32_t num, uint32_t *out)
5244 {
5245         WREG32(mmSQ_IND_INDEX,
5246                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5247                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5248                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5249                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5250                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5251                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5252         while (num--)
5253                 *(out++) = RREG32(mmSQ_IND_DATA);
5254 }
5255
5256 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5257 {
5258         /* type 0 wave data */
5259         dst[(*no_fields)++] = 0;
5260         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5261         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5262         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5278 }
5279
5280 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5281                                      uint32_t wave, uint32_t start,
5282                                      uint32_t size, uint32_t *dst)
5283 {
5284         wave_read_regs(
5285                 adev, simd, wave, 0,
5286                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5287 }
5288
5289
5290 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5291         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5292         .select_se_sh = &gfx_v8_0_select_se_sh,
5293         .read_wave_data = &gfx_v8_0_read_wave_data,
5294         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5295         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5296 };
5297
5298 static int gfx_v8_0_early_init(void *handle)
5299 {
5300         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5301
5302         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5303         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5304         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5305         gfx_v8_0_set_ring_funcs(adev);
5306         gfx_v8_0_set_irq_funcs(adev);
5307         gfx_v8_0_set_gds_init(adev);
5308         gfx_v8_0_set_rlc_funcs(adev);
5309
5310         return 0;
5311 }
5312
5313 static int gfx_v8_0_late_init(void *handle)
5314 {
5315         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5316         int r;
5317
5318         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5319         if (r)
5320                 return r;
5321
5322         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5323         if (r)
5324                 return r;
5325
5326         /* requires IBs so do in late init after IB pool is initialized */
5327         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5328         if (r)
5329                 return r;
5330
5331         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5332         if (r) {
5333                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5334                 return r;
5335         }
5336
5337         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5338         if (r) {
5339                 DRM_ERROR(
5340                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5341                         r);
5342                 return r;
5343         }
5344
5345         return 0;
5346 }
5347
5348 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5349                                                        bool enable)
5350 {
5351         if (((adev->asic_type == CHIP_POLARIS11) ||
5352             (adev->asic_type == CHIP_POLARIS12) ||
5353             (adev->asic_type == CHIP_VEGAM)) &&
5354             adev->powerplay.pp_funcs->set_powergating_by_smu)
5355                 /* Send msg to SMU via Powerplay */
5356                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5357
5358         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5359 }
5360
5361 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5362                                                         bool enable)
5363 {
5364         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5365 }
5366
5367 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5368                 bool enable)
5369 {
5370         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5371 }
5372
5373 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5374                                           bool enable)
5375 {
5376         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5377 }
5378
5379 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5380                                                 bool enable)
5381 {
5382         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5383
5384         /* Read any GFX register to wake up GFX. */
5385         if (!enable)
5386                 RREG32(mmDB_RENDER_CONTROL);
5387 }
5388
5389 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5390                                           bool enable)
5391 {
5392         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5393                 cz_enable_gfx_cg_power_gating(adev, true);
5394                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5395                         cz_enable_gfx_pipeline_power_gating(adev, true);
5396         } else {
5397                 cz_enable_gfx_cg_power_gating(adev, false);
5398                 cz_enable_gfx_pipeline_power_gating(adev, false);
5399         }
5400 }
5401
5402 static int gfx_v8_0_set_powergating_state(void *handle,
5403                                           enum amd_powergating_state state)
5404 {
5405         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5406         bool enable = (state == AMD_PG_STATE_GATE);
5407
5408         if (amdgpu_sriov_vf(adev))
5409                 return 0;
5410
5411         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5412                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5413                                 AMD_PG_SUPPORT_CP |
5414                                 AMD_PG_SUPPORT_GFX_DMG))
5415                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5416         switch (adev->asic_type) {
5417         case CHIP_CARRIZO:
5418         case CHIP_STONEY:
5419
5420                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5421                         cz_enable_sck_slow_down_on_power_up(adev, true);
5422                         cz_enable_sck_slow_down_on_power_down(adev, true);
5423                 } else {
5424                         cz_enable_sck_slow_down_on_power_up(adev, false);
5425                         cz_enable_sck_slow_down_on_power_down(adev, false);
5426                 }
5427                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5428                         cz_enable_cp_power_gating(adev, true);
5429                 else
5430                         cz_enable_cp_power_gating(adev, false);
5431
5432                 cz_update_gfx_cg_power_gating(adev, enable);
5433
5434                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5435                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5436                 else
5437                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5438
5439                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5440                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5441                 else
5442                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5443                 break;
5444         case CHIP_POLARIS11:
5445         case CHIP_POLARIS12:
5446         case CHIP_VEGAM:
5447                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5448                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5449                 else
5450                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5451
5452                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5453                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5454                 else
5455                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5456
5457                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5458                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5459                 else
5460                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5461                 break;
5462         default:
5463                 break;
5464         }
5465         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5466                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5467                                 AMD_PG_SUPPORT_CP |
5468                                 AMD_PG_SUPPORT_GFX_DMG))
5469                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5470         return 0;
5471 }
5472
5473 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5474 {
5475         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5476         int data;
5477
5478         if (amdgpu_sriov_vf(adev))
5479                 *flags = 0;
5480
5481         /* AMD_CG_SUPPORT_GFX_MGCG */
5482         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5483         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5484                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5485
5486         /* AMD_CG_SUPPORT_GFX_CGLG */
5487         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5488         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5489                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5490
5491         /* AMD_CG_SUPPORT_GFX_CGLS */
5492         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5493                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5494
5495         /* AMD_CG_SUPPORT_GFX_CGTS */
5496         data = RREG32(mmCGTS_SM_CTRL_REG);
5497         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5498                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5499
5500         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5501         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5502                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5503
5504         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5505         data = RREG32(mmRLC_MEM_SLP_CNTL);
5506         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5507                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5508
5509         /* AMD_CG_SUPPORT_GFX_CP_LS */
5510         data = RREG32(mmCP_MEM_SLP_CNTL);
5511         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5512                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5513 }
5514
5515 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5516                                      uint32_t reg_addr, uint32_t cmd)
5517 {
5518         uint32_t data;
5519
5520         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5521
5522         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5523         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5524
5525         data = RREG32(mmRLC_SERDES_WR_CTRL);
5526         if (adev->asic_type == CHIP_STONEY)
5527                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5528                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5529                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5530                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5531                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5532                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5533                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5534                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5535                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5536         else
5537                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5538                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5539                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5540                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5541                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5542                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5543                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5544                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5545                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5546                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5547                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5548         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5549                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5550                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5551                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5552
5553         WREG32(mmRLC_SERDES_WR_CTRL, data);
5554 }
5555
5556 #define MSG_ENTER_RLC_SAFE_MODE     1
5557 #define MSG_EXIT_RLC_SAFE_MODE      0
5558 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5559 #define RLC_GPR_REG2__REQ__SHIFT 0
5560 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5561 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5562
5563 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5564 {
5565         uint32_t rlc_setting;
5566
5567         rlc_setting = RREG32(mmRLC_CNTL);
5568         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5569                 return false;
5570
5571         return true;
5572 }
5573
5574 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5575 {
5576         uint32_t data;
5577         unsigned i;
5578         data = RREG32(mmRLC_CNTL);
5579         data |= RLC_SAFE_MODE__CMD_MASK;
5580         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5581         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5582         WREG32(mmRLC_SAFE_MODE, data);
5583
5584         /* wait for RLC_SAFE_MODE */
5585         for (i = 0; i < adev->usec_timeout; i++) {
5586                 if ((RREG32(mmRLC_GPM_STAT) &
5587                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5588                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5589                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5590                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5591                         break;
5592                 udelay(1);
5593         }
5594         for (i = 0; i < adev->usec_timeout; i++) {
5595                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5596                         break;
5597                 udelay(1);
5598         }
5599 }
5600
5601 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5602 {
5603         uint32_t data;
5604         unsigned i;
5605
5606         data = RREG32(mmRLC_CNTL);
5607         data |= RLC_SAFE_MODE__CMD_MASK;
5608         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5609         WREG32(mmRLC_SAFE_MODE, data);
5610
5611         for (i = 0; i < adev->usec_timeout; i++) {
5612                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5613                         break;
5614                 udelay(1);
5615         }
5616 }
5617
5618 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5619 {
5620         u32 data;
5621
5622         data = RREG32(mmRLC_SPM_VMID);
5623
5624         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5625         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5626
5627         WREG32(mmRLC_SPM_VMID, data);
5628 }
5629
5630 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5631         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5632         .set_safe_mode = gfx_v8_0_set_safe_mode,
5633         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5634         .init = gfx_v8_0_rlc_init,
5635         .get_csb_size = gfx_v8_0_get_csb_size,
5636         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5637         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5638         .resume = gfx_v8_0_rlc_resume,
5639         .stop = gfx_v8_0_rlc_stop,
5640         .reset = gfx_v8_0_rlc_reset,
5641         .start = gfx_v8_0_rlc_start,
5642         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5643 };
5644
5645 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5646                                                       bool enable)
5647 {
5648         uint32_t temp, data;
5649
5650         amdgpu_gfx_rlc_enter_safe_mode(adev);
5651
5652         /* It is disabled by HW by default */
5653         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5654                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5655                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5656                                 /* 1 - RLC memory Light sleep */
5657                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5658
5659                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5660                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5661                 }
5662
5663                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5664                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665                 if (adev->flags & AMD_IS_APU)
5666                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5667                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5668                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5669                 else
5670                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5671                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5672                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5673                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5674
5675                 if (temp != data)
5676                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5677
5678                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5679                 gfx_v8_0_wait_for_rlc_serdes(adev);
5680
5681                 /* 5 - clear mgcg override */
5682                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5683
5684                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5685                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5686                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5687                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5688                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5689                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5690                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5691                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5692                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5693                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5694                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5695                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5696                         if (temp != data)
5697                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5698                 }
5699                 udelay(50);
5700
5701                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5702                 gfx_v8_0_wait_for_rlc_serdes(adev);
5703         } else {
5704                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5705                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5706                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5709                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5710                 if (temp != data)
5711                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5712
5713                 /* 2 - disable MGLS in RLC */
5714                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5715                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5716                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5717                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5718                 }
5719
5720                 /* 3 - disable MGLS in CP */
5721                 data = RREG32(mmCP_MEM_SLP_CNTL);
5722                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5723                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5724                         WREG32(mmCP_MEM_SLP_CNTL, data);
5725                 }
5726
5727                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5728                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5729                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5730                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5731                 if (temp != data)
5732                         WREG32(mmCGTS_SM_CTRL_REG, data);
5733
5734                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5735                 gfx_v8_0_wait_for_rlc_serdes(adev);
5736
5737                 /* 6 - set mgcg override */
5738                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5739
5740                 udelay(50);
5741
5742                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743                 gfx_v8_0_wait_for_rlc_serdes(adev);
5744         }
5745
5746         amdgpu_gfx_rlc_exit_safe_mode(adev);
5747 }
5748
5749 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5750                                                       bool enable)
5751 {
5752         uint32_t temp, temp1, data, data1;
5753
5754         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5755
5756         amdgpu_gfx_rlc_enter_safe_mode(adev);
5757
5758         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5759                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5760                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5761                 if (temp1 != data1)
5762                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5763
5764                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765                 gfx_v8_0_wait_for_rlc_serdes(adev);
5766
5767                 /* 2 - clear cgcg override */
5768                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5769
5770                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771                 gfx_v8_0_wait_for_rlc_serdes(adev);
5772
5773                 /* 3 - write cmd to set CGLS */
5774                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5775
5776                 /* 4 - enable cgcg */
5777                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5778
5779                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5780                         /* enable cgls*/
5781                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5782
5783                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5784                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5785
5786                         if (temp1 != data1)
5787                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5788                 } else {
5789                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5790                 }
5791
5792                 if (temp != data)
5793                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5794
5795                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5796                  * Cmp_busy/GFX_Idle interrupts
5797                  */
5798                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5799         } else {
5800                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5801                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5802
5803                 /* TEST CGCG */
5804                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5805                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5806                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5807                 if (temp1 != data1)
5808                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5809
5810                 /* read gfx register to wake up cgcg */
5811                 RREG32(mmCB_CGTT_SCLK_CTRL);
5812                 RREG32(mmCB_CGTT_SCLK_CTRL);
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814                 RREG32(mmCB_CGTT_SCLK_CTRL);
5815
5816                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5817                 gfx_v8_0_wait_for_rlc_serdes(adev);
5818
5819                 /* write cmd to Set CGCG Overrride */
5820                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5821
5822                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823                 gfx_v8_0_wait_for_rlc_serdes(adev);
5824
5825                 /* write cmd to Clear CGLS */
5826                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5827
5828                 /* disable cgcg, cgls should be disabled too. */
5829                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5830                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5831                 if (temp != data)
5832                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5833                 /* enable interrupts again for PG */
5834                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5835         }
5836
5837         gfx_v8_0_wait_for_rlc_serdes(adev);
5838
5839         amdgpu_gfx_rlc_exit_safe_mode(adev);
5840 }
5841 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5842                                             bool enable)
5843 {
5844         if (enable) {
5845                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5846                  * ===  MGCG + MGLS + TS(CG/LS) ===
5847                  */
5848                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5849                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5850         } else {
5851                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5852                  * ===  CGCG + CGLS ===
5853                  */
5854                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5855                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5856         }
5857         return 0;
5858 }
5859
5860 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5861                                           enum amd_clockgating_state state)
5862 {
5863         uint32_t msg_id, pp_state = 0;
5864         uint32_t pp_support_state = 0;
5865
5866         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5867                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5868                         pp_support_state = PP_STATE_SUPPORT_LS;
5869                         pp_state = PP_STATE_LS;
5870                 }
5871                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5872                         pp_support_state |= PP_STATE_SUPPORT_CG;
5873                         pp_state |= PP_STATE_CG;
5874                 }
5875                 if (state == AMD_CG_STATE_UNGATE)
5876                         pp_state = 0;
5877
5878                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5879                                 PP_BLOCK_GFX_CG,
5880                                 pp_support_state,
5881                                 pp_state);
5882                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5883                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884         }
5885
5886         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5887                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888                         pp_support_state = PP_STATE_SUPPORT_LS;
5889                         pp_state = PP_STATE_LS;
5890                 }
5891
5892                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5893                         pp_support_state |= PP_STATE_SUPPORT_CG;
5894                         pp_state |= PP_STATE_CG;
5895                 }
5896
5897                 if (state == AMD_CG_STATE_UNGATE)
5898                         pp_state = 0;
5899
5900                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5901                                 PP_BLOCK_GFX_MG,
5902                                 pp_support_state,
5903                                 pp_state);
5904                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5905                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5906         }
5907
5908         return 0;
5909 }
5910
5911 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5912                                           enum amd_clockgating_state state)
5913 {
5914
5915         uint32_t msg_id, pp_state = 0;
5916         uint32_t pp_support_state = 0;
5917
5918         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5919                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920                         pp_support_state = PP_STATE_SUPPORT_LS;
5921                         pp_state = PP_STATE_LS;
5922                 }
5923                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5924                         pp_support_state |= PP_STATE_SUPPORT_CG;
5925                         pp_state |= PP_STATE_CG;
5926                 }
5927                 if (state == AMD_CG_STATE_UNGATE)
5928                         pp_state = 0;
5929
5930                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5931                                 PP_BLOCK_GFX_CG,
5932                                 pp_support_state,
5933                                 pp_state);
5934                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5935                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5936         }
5937
5938         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5939                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5940                         pp_support_state = PP_STATE_SUPPORT_LS;
5941                         pp_state = PP_STATE_LS;
5942                 }
5943                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5944                         pp_support_state |= PP_STATE_SUPPORT_CG;
5945                         pp_state |= PP_STATE_CG;
5946                 }
5947                 if (state == AMD_CG_STATE_UNGATE)
5948                         pp_state = 0;
5949
5950                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5951                                 PP_BLOCK_GFX_3D,
5952                                 pp_support_state,
5953                                 pp_state);
5954                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5955                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5956         }
5957
5958         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5959                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5960                         pp_support_state = PP_STATE_SUPPORT_LS;
5961                         pp_state = PP_STATE_LS;
5962                 }
5963
5964                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5965                         pp_support_state |= PP_STATE_SUPPORT_CG;
5966                         pp_state |= PP_STATE_CG;
5967                 }
5968
5969                 if (state == AMD_CG_STATE_UNGATE)
5970                         pp_state = 0;
5971
5972                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5973                                 PP_BLOCK_GFX_MG,
5974                                 pp_support_state,
5975                                 pp_state);
5976                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5977                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5978         }
5979
5980         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5981                 pp_support_state = PP_STATE_SUPPORT_LS;
5982
5983                 if (state == AMD_CG_STATE_UNGATE)
5984                         pp_state = 0;
5985                 else
5986                         pp_state = PP_STATE_LS;
5987
5988                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5989                                 PP_BLOCK_GFX_RLC,
5990                                 pp_support_state,
5991                                 pp_state);
5992                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5993                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5994         }
5995
5996         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5997                 pp_support_state = PP_STATE_SUPPORT_LS;
5998
5999                 if (state == AMD_CG_STATE_UNGATE)
6000                         pp_state = 0;
6001                 else
6002                         pp_state = PP_STATE_LS;
6003                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6004                         PP_BLOCK_GFX_CP,
6005                         pp_support_state,
6006                         pp_state);
6007                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6008                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6009         }
6010
6011         return 0;
6012 }
6013
6014 static int gfx_v8_0_set_clockgating_state(void *handle,
6015                                           enum amd_clockgating_state state)
6016 {
6017         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6018
6019         if (amdgpu_sriov_vf(adev))
6020                 return 0;
6021
6022         switch (adev->asic_type) {
6023         case CHIP_FIJI:
6024         case CHIP_CARRIZO:
6025         case CHIP_STONEY:
6026                 gfx_v8_0_update_gfx_clock_gating(adev,
6027                                                  state == AMD_CG_STATE_GATE);
6028                 break;
6029         case CHIP_TONGA:
6030                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6031                 break;
6032         case CHIP_POLARIS10:
6033         case CHIP_POLARIS11:
6034         case CHIP_POLARIS12:
6035         case CHIP_VEGAM:
6036                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6037                 break;
6038         default:
6039                 break;
6040         }
6041         return 0;
6042 }
6043
6044 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6045 {
6046         return ring->adev->wb.wb[ring->rptr_offs];
6047 }
6048
6049 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6050 {
6051         struct amdgpu_device *adev = ring->adev;
6052
6053         if (ring->use_doorbell)
6054                 /* XXX check if swapping is necessary on BE */
6055                 return ring->adev->wb.wb[ring->wptr_offs];
6056         else
6057                 return RREG32(mmCP_RB0_WPTR);
6058 }
6059
6060 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6061 {
6062         struct amdgpu_device *adev = ring->adev;
6063
6064         if (ring->use_doorbell) {
6065                 /* XXX check if swapping is necessary on BE */
6066                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6067                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6068         } else {
6069                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6070                 (void)RREG32(mmCP_RB0_WPTR);
6071         }
6072 }
6073
6074 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6075 {
6076         u32 ref_and_mask, reg_mem_engine;
6077
6078         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6079             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6080                 switch (ring->me) {
6081                 case 1:
6082                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6083                         break;
6084                 case 2:
6085                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6086                         break;
6087                 default:
6088                         return;
6089                 }
6090                 reg_mem_engine = 0;
6091         } else {
6092                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6093                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6094         }
6095
6096         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6097         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6098                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6099                                  reg_mem_engine));
6100         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6101         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6102         amdgpu_ring_write(ring, ref_and_mask);
6103         amdgpu_ring_write(ring, ref_and_mask);
6104         amdgpu_ring_write(ring, 0x20); /* poll interval */
6105 }
6106
6107 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6108 {
6109         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6110         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6111                 EVENT_INDEX(4));
6112
6113         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6114         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6115                 EVENT_INDEX(0));
6116 }
6117
6118 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6119                                         struct amdgpu_job *job,
6120                                         struct amdgpu_ib *ib,
6121                                         uint32_t flags)
6122 {
6123         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6124         u32 header, control = 0;
6125
6126         if (ib->flags & AMDGPU_IB_FLAG_CE)
6127                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6128         else
6129                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6130
6131         control |= ib->length_dw | (vmid << 24);
6132
6133         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6134                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6135
6136                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6137                         gfx_v8_0_ring_emit_de_meta(ring);
6138         }
6139
6140         amdgpu_ring_write(ring, header);
6141         amdgpu_ring_write(ring,
6142 #ifdef __BIG_ENDIAN
6143                           (2 << 0) |
6144 #endif
6145                           (ib->gpu_addr & 0xFFFFFFFC));
6146         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6147         amdgpu_ring_write(ring, control);
6148 }
6149
6150 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6151                                           struct amdgpu_job *job,
6152                                           struct amdgpu_ib *ib,
6153                                           uint32_t flags)
6154 {
6155         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6156         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6157
6158         /* Currently, there is a high possibility to get wave ID mismatch
6159          * between ME and GDS, leading to a hw deadlock, because ME generates
6160          * different wave IDs than the GDS expects. This situation happens
6161          * randomly when at least 5 compute pipes use GDS ordered append.
6162          * The wave IDs generated by ME are also wrong after suspend/resume.
6163          * Those are probably bugs somewhere else in the kernel driver.
6164          *
6165          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6166          * GDS to 0 for this ring (me/pipe).
6167          */
6168         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6169                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6170                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6171                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6172         }
6173
6174         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6175         amdgpu_ring_write(ring,
6176 #ifdef __BIG_ENDIAN
6177                                 (2 << 0) |
6178 #endif
6179                                 (ib->gpu_addr & 0xFFFFFFFC));
6180         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6181         amdgpu_ring_write(ring, control);
6182 }
6183
6184 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6185                                          u64 seq, unsigned flags)
6186 {
6187         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6188         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6189
6190         /* Workaround for cache flush problems. First send a dummy EOP
6191          * event down the pipe with seq one below.
6192          */
6193         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6194         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6195                                  EOP_TC_ACTION_EN |
6196                                  EOP_TC_WB_ACTION_EN |
6197                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6198                                  EVENT_INDEX(5)));
6199         amdgpu_ring_write(ring, addr & 0xfffffffc);
6200         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6201                                 DATA_SEL(1) | INT_SEL(0));
6202         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6203         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6204
6205         /* Then send the real EOP event down the pipe:
6206          * EVENT_WRITE_EOP - flush caches, send int */
6207         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6208         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6209                                  EOP_TC_ACTION_EN |
6210                                  EOP_TC_WB_ACTION_EN |
6211                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6212                                  EVENT_INDEX(5)));
6213         amdgpu_ring_write(ring, addr & 0xfffffffc);
6214         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6215                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6216         amdgpu_ring_write(ring, lower_32_bits(seq));
6217         amdgpu_ring_write(ring, upper_32_bits(seq));
6218
6219 }
6220
6221 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6222 {
6223         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6224         uint32_t seq = ring->fence_drv.sync_seq;
6225         uint64_t addr = ring->fence_drv.gpu_addr;
6226
6227         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6228         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6229                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6230                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6231         amdgpu_ring_write(ring, addr & 0xfffffffc);
6232         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6233         amdgpu_ring_write(ring, seq);
6234         amdgpu_ring_write(ring, 0xffffffff);
6235         amdgpu_ring_write(ring, 4); /* poll interval */
6236 }
6237
6238 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6239                                         unsigned vmid, uint64_t pd_addr)
6240 {
6241         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6242
6243         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6244
6245         /* wait for the invalidate to complete */
6246         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6247         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6248                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6249                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6250         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6251         amdgpu_ring_write(ring, 0);
6252         amdgpu_ring_write(ring, 0); /* ref */
6253         amdgpu_ring_write(ring, 0); /* mask */
6254         amdgpu_ring_write(ring, 0x20); /* poll interval */
6255
6256         /* compute doesn't have PFP */
6257         if (usepfp) {
6258                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6259                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6260                 amdgpu_ring_write(ring, 0x0);
6261         }
6262 }
6263
6264 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6265 {
6266         return ring->adev->wb.wb[ring->wptr_offs];
6267 }
6268
6269 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6270 {
6271         struct amdgpu_device *adev = ring->adev;
6272
6273         /* XXX check if swapping is necessary on BE */
6274         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6275         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6276 }
6277
6278 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6279                                              u64 addr, u64 seq,
6280                                              unsigned flags)
6281 {
6282         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6283         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6284
6285         /* RELEASE_MEM - flush caches, send int */
6286         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6287         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6288                                  EOP_TC_ACTION_EN |
6289                                  EOP_TC_WB_ACTION_EN |
6290                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6291                                  EVENT_INDEX(5)));
6292         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6293         amdgpu_ring_write(ring, addr & 0xfffffffc);
6294         amdgpu_ring_write(ring, upper_32_bits(addr));
6295         amdgpu_ring_write(ring, lower_32_bits(seq));
6296         amdgpu_ring_write(ring, upper_32_bits(seq));
6297 }
6298
6299 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6300                                          u64 seq, unsigned int flags)
6301 {
6302         /* we only allocate 32bit for each seq wb address */
6303         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6304
6305         /* write fence seq to the "addr" */
6306         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6307         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6308                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6309         amdgpu_ring_write(ring, lower_32_bits(addr));
6310         amdgpu_ring_write(ring, upper_32_bits(addr));
6311         amdgpu_ring_write(ring, lower_32_bits(seq));
6312
6313         if (flags & AMDGPU_FENCE_FLAG_INT) {
6314                 /* set register to trigger INT */
6315                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6316                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6317                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6318                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6319                 amdgpu_ring_write(ring, 0);
6320                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6321         }
6322 }
6323
6324 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6325 {
6326         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6327         amdgpu_ring_write(ring, 0);
6328 }
6329
6330 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6331 {
6332         uint32_t dw2 = 0;
6333
6334         if (amdgpu_sriov_vf(ring->adev))
6335                 gfx_v8_0_ring_emit_ce_meta(ring);
6336
6337         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6338         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6339                 gfx_v8_0_ring_emit_vgt_flush(ring);
6340                 /* set load_global_config & load_global_uconfig */
6341                 dw2 |= 0x8001;
6342                 /* set load_cs_sh_regs */
6343                 dw2 |= 0x01000000;
6344                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6345                 dw2 |= 0x10002;
6346
6347                 /* set load_ce_ram if preamble presented */
6348                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6349                         dw2 |= 0x10000000;
6350         } else {
6351                 /* still load_ce_ram if this is the first time preamble presented
6352                  * although there is no context switch happens.
6353                  */
6354                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6355                         dw2 |= 0x10000000;
6356         }
6357
6358         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6359         amdgpu_ring_write(ring, dw2);
6360         amdgpu_ring_write(ring, 0);
6361 }
6362
6363 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6364 {
6365         unsigned ret;
6366
6367         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6368         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6369         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6370         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6371         ret = ring->wptr & ring->buf_mask;
6372         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6373         return ret;
6374 }
6375
6376 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6377 {
6378         unsigned cur;
6379
6380         BUG_ON(offset > ring->buf_mask);
6381         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6382
6383         cur = (ring->wptr & ring->buf_mask) - 1;
6384         if (likely(cur > offset))
6385                 ring->ring[offset] = cur - offset;
6386         else
6387                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6388 }
6389
6390 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6391 {
6392         struct amdgpu_device *adev = ring->adev;
6393         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
6394
6395         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6396         amdgpu_ring_write(ring, 0 |     /* src: register*/
6397                                 (5 << 8) |      /* dst: memory */
6398                                 (1 << 20));     /* write confirm */
6399         amdgpu_ring_write(ring, reg);
6400         amdgpu_ring_write(ring, 0);
6401         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6402                                 kiq->reg_val_offs * 4));
6403         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6404                                 kiq->reg_val_offs * 4));
6405 }
6406
6407 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6408                                   uint32_t val)
6409 {
6410         uint32_t cmd;
6411
6412         switch (ring->funcs->type) {
6413         case AMDGPU_RING_TYPE_GFX:
6414                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6415                 break;
6416         case AMDGPU_RING_TYPE_KIQ:
6417                 cmd = 1 << 16; /* no inc addr */
6418                 break;
6419         default:
6420                 cmd = WR_CONFIRM;
6421                 break;
6422         }
6423
6424         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6425         amdgpu_ring_write(ring, cmd);
6426         amdgpu_ring_write(ring, reg);
6427         amdgpu_ring_write(ring, 0);
6428         amdgpu_ring_write(ring, val);
6429 }
6430
6431 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6432 {
6433         struct amdgpu_device *adev = ring->adev;
6434         uint32_t value = 0;
6435
6436         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6437         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6438         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6439         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6440         WREG32(mmSQ_CMD, value);
6441 }
6442
6443 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6444                                                  enum amdgpu_interrupt_state state)
6445 {
6446         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6447                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6448 }
6449
6450 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6451                                                      int me, int pipe,
6452                                                      enum amdgpu_interrupt_state state)
6453 {
6454         u32 mec_int_cntl, mec_int_cntl_reg;
6455
6456         /*
6457          * amdgpu controls only the first MEC. That's why this function only
6458          * handles the setting of interrupts for this specific MEC. All other
6459          * pipes' interrupts are set by amdkfd.
6460          */
6461
6462         if (me == 1) {
6463                 switch (pipe) {
6464                 case 0:
6465                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6466                         break;
6467                 case 1:
6468                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6469                         break;
6470                 case 2:
6471                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6472                         break;
6473                 case 3:
6474                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6475                         break;
6476                 default:
6477                         DRM_DEBUG("invalid pipe %d\n", pipe);
6478                         return;
6479                 }
6480         } else {
6481                 DRM_DEBUG("invalid me %d\n", me);
6482                 return;
6483         }
6484
6485         switch (state) {
6486         case AMDGPU_IRQ_STATE_DISABLE:
6487                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6488                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6489                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6490                 break;
6491         case AMDGPU_IRQ_STATE_ENABLE:
6492                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6493                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6494                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6495                 break;
6496         default:
6497                 break;
6498         }
6499 }
6500
6501 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6502                                              struct amdgpu_irq_src *source,
6503                                              unsigned type,
6504                                              enum amdgpu_interrupt_state state)
6505 {
6506         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6507                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6508
6509         return 0;
6510 }
6511
6512 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6513                                               struct amdgpu_irq_src *source,
6514                                               unsigned type,
6515                                               enum amdgpu_interrupt_state state)
6516 {
6517         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6518                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6519
6520         return 0;
6521 }
6522
6523 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6524                                             struct amdgpu_irq_src *src,
6525                                             unsigned type,
6526                                             enum amdgpu_interrupt_state state)
6527 {
6528         switch (type) {
6529         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6530                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6531                 break;
6532         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6533                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6534                 break;
6535         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6536                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6537                 break;
6538         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6539                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6540                 break;
6541         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6542                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6543                 break;
6544         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6545                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6546                 break;
6547         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6548                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6549                 break;
6550         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6551                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6552                 break;
6553         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6554                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6555                 break;
6556         default:
6557                 break;
6558         }
6559         return 0;
6560 }
6561
6562 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6563                                          struct amdgpu_irq_src *source,
6564                                          unsigned int type,
6565                                          enum amdgpu_interrupt_state state)
6566 {
6567         int enable_flag;
6568
6569         switch (state) {
6570         case AMDGPU_IRQ_STATE_DISABLE:
6571                 enable_flag = 0;
6572                 break;
6573
6574         case AMDGPU_IRQ_STATE_ENABLE:
6575                 enable_flag = 1;
6576                 break;
6577
6578         default:
6579                 return -EINVAL;
6580         }
6581
6582         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6583         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6584         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6585         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588                      enable_flag);
6589         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590                      enable_flag);
6591         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                      enable_flag);
6595         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596                      enable_flag);
6597         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6598                      enable_flag);
6599         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6600                      enable_flag);
6601         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602                      enable_flag);
6603
6604         return 0;
6605 }
6606
6607 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6608                                      struct amdgpu_irq_src *source,
6609                                      unsigned int type,
6610                                      enum amdgpu_interrupt_state state)
6611 {
6612         int enable_flag;
6613
6614         switch (state) {
6615         case AMDGPU_IRQ_STATE_DISABLE:
6616                 enable_flag = 1;
6617                 break;
6618
6619         case AMDGPU_IRQ_STATE_ENABLE:
6620                 enable_flag = 0;
6621                 break;
6622
6623         default:
6624                 return -EINVAL;
6625         }
6626
6627         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6628                      enable_flag);
6629
6630         return 0;
6631 }
6632
6633 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6634                             struct amdgpu_irq_src *source,
6635                             struct amdgpu_iv_entry *entry)
6636 {
6637         int i;
6638         u8 me_id, pipe_id, queue_id;
6639         struct amdgpu_ring *ring;
6640
6641         DRM_DEBUG("IH: CP EOP\n");
6642         me_id = (entry->ring_id & 0x0c) >> 2;
6643         pipe_id = (entry->ring_id & 0x03) >> 0;
6644         queue_id = (entry->ring_id & 0x70) >> 4;
6645
6646         switch (me_id) {
6647         case 0:
6648                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6649                 break;
6650         case 1:
6651         case 2:
6652                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6653                         ring = &adev->gfx.compute_ring[i];
6654                         /* Per-queue interrupt is supported for MEC starting from VI.
6655                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6656                           */
6657                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6658                                 amdgpu_fence_process(ring);
6659                 }
6660                 break;
6661         }
6662         return 0;
6663 }
6664
6665 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6666                            struct amdgpu_iv_entry *entry)
6667 {
6668         u8 me_id, pipe_id, queue_id;
6669         struct amdgpu_ring *ring;
6670         int i;
6671
6672         me_id = (entry->ring_id & 0x0c) >> 2;
6673         pipe_id = (entry->ring_id & 0x03) >> 0;
6674         queue_id = (entry->ring_id & 0x70) >> 4;
6675
6676         switch (me_id) {
6677         case 0:
6678                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6679                 break;
6680         case 1:
6681         case 2:
6682                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6683                         ring = &adev->gfx.compute_ring[i];
6684                         if (ring->me == me_id && ring->pipe == pipe_id &&
6685                             ring->queue == queue_id)
6686                                 drm_sched_fault(&ring->sched);
6687                 }
6688                 break;
6689         }
6690 }
6691
6692 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6693                                  struct amdgpu_irq_src *source,
6694                                  struct amdgpu_iv_entry *entry)
6695 {
6696         DRM_ERROR("Illegal register access in command stream\n");
6697         gfx_v8_0_fault(adev, entry);
6698         return 0;
6699 }
6700
6701 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6702                                   struct amdgpu_irq_src *source,
6703                                   struct amdgpu_iv_entry *entry)
6704 {
6705         DRM_ERROR("Illegal instruction in command stream\n");
6706         gfx_v8_0_fault(adev, entry);
6707         return 0;
6708 }
6709
6710 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6711                                      struct amdgpu_irq_src *source,
6712                                      struct amdgpu_iv_entry *entry)
6713 {
6714         DRM_ERROR("CP EDC/ECC error detected.");
6715         return 0;
6716 }
6717
6718 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6719 {
6720         u32 enc, se_id, sh_id, cu_id;
6721         char type[20];
6722         int sq_edc_source = -1;
6723
6724         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6725         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6726
6727         switch (enc) {
6728                 case 0:
6729                         DRM_INFO("SQ general purpose intr detected:"
6730                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6731                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6732                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6733                                         "wlt %d, thread_trace %d.\n",
6734                                         se_id,
6735                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6736                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6737                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6738                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6739                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6740                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6741                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6742                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6743                                         );
6744                         break;
6745                 case 1:
6746                 case 2:
6747
6748                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6749                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6750
6751                         /*
6752                          * This function can be called either directly from ISR
6753                          * or from BH in which case we can access SQ_EDC_INFO
6754                          * instance
6755                          */
6756                         if (in_task()) {
6757                                 mutex_lock(&adev->grbm_idx_mutex);
6758                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6759
6760                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6761
6762                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6763                                 mutex_unlock(&adev->grbm_idx_mutex);
6764                         }
6765
6766                         if (enc == 1)
6767                                 sprintf(type, "instruction intr");
6768                         else
6769                                 sprintf(type, "EDC/ECC error");
6770
6771                         DRM_INFO(
6772                                 "SQ %s detected: "
6773                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6774                                         "trap %s, sq_ed_info.source %s.\n",
6775                                         type, se_id, sh_id, cu_id,
6776                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6777                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6778                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6779                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6780                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6781                                 );
6782                         break;
6783                 default:
6784                         DRM_ERROR("SQ invalid encoding type\n.");
6785         }
6786 }
6787
6788 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6789 {
6790
6791         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6792         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6793
6794         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6795 }
6796
6797 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6798                            struct amdgpu_irq_src *source,
6799                            struct amdgpu_iv_entry *entry)
6800 {
6801         unsigned ih_data = entry->src_data[0];
6802
6803         /*
6804          * Try to submit work so SQ_EDC_INFO can be accessed from
6805          * BH. If previous work submission hasn't finished yet
6806          * just print whatever info is possible directly from the ISR.
6807          */
6808         if (work_pending(&adev->gfx.sq_work.work)) {
6809                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6810         } else {
6811                 adev->gfx.sq_work.ih_data = ih_data;
6812                 schedule_work(&adev->gfx.sq_work.work);
6813         }
6814
6815         return 0;
6816 }
6817
6818 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6819         .name = "gfx_v8_0",
6820         .early_init = gfx_v8_0_early_init,
6821         .late_init = gfx_v8_0_late_init,
6822         .sw_init = gfx_v8_0_sw_init,
6823         .sw_fini = gfx_v8_0_sw_fini,
6824         .hw_init = gfx_v8_0_hw_init,
6825         .hw_fini = gfx_v8_0_hw_fini,
6826         .suspend = gfx_v8_0_suspend,
6827         .resume = gfx_v8_0_resume,
6828         .is_idle = gfx_v8_0_is_idle,
6829         .wait_for_idle = gfx_v8_0_wait_for_idle,
6830         .check_soft_reset = gfx_v8_0_check_soft_reset,
6831         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6832         .soft_reset = gfx_v8_0_soft_reset,
6833         .post_soft_reset = gfx_v8_0_post_soft_reset,
6834         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6835         .set_powergating_state = gfx_v8_0_set_powergating_state,
6836         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6837 };
6838
6839 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6840         .type = AMDGPU_RING_TYPE_GFX,
6841         .align_mask = 0xff,
6842         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6843         .support_64bit_ptrs = false,
6844         .get_rptr = gfx_v8_0_ring_get_rptr,
6845         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6846         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6847         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6848                 5 +  /* COND_EXEC */
6849                 7 +  /* PIPELINE_SYNC */
6850                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6851                 12 +  /* FENCE for VM_FLUSH */
6852                 20 + /* GDS switch */
6853                 4 + /* double SWITCH_BUFFER,
6854                        the first COND_EXEC jump to the place just
6855                            prior to this double SWITCH_BUFFER  */
6856                 5 + /* COND_EXEC */
6857                 7 +      /*     HDP_flush */
6858                 4 +      /*     VGT_flush */
6859                 14 + /* CE_META */
6860                 31 + /* DE_META */
6861                 3 + /* CNTX_CTRL */
6862                 5 + /* HDP_INVL */
6863                 12 + 12 + /* FENCE x2 */
6864                 2, /* SWITCH_BUFFER */
6865         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6866         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6867         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6868         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6869         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6870         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6871         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6872         .test_ring = gfx_v8_0_ring_test_ring,
6873         .test_ib = gfx_v8_0_ring_test_ib,
6874         .insert_nop = amdgpu_ring_insert_nop,
6875         .pad_ib = amdgpu_ring_generic_pad_ib,
6876         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6877         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6878         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6879         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6880         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6881         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6882 };
6883
6884 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6885         .type = AMDGPU_RING_TYPE_COMPUTE,
6886         .align_mask = 0xff,
6887         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6888         .support_64bit_ptrs = false,
6889         .get_rptr = gfx_v8_0_ring_get_rptr,
6890         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6891         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6892         .emit_frame_size =
6893                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6894                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6895                 5 + /* hdp_invalidate */
6896                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6897                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6898                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6899         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6900         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6901         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6902         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6903         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6904         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6905         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6906         .test_ring = gfx_v8_0_ring_test_ring,
6907         .test_ib = gfx_v8_0_ring_test_ib,
6908         .insert_nop = amdgpu_ring_insert_nop,
6909         .pad_ib = amdgpu_ring_generic_pad_ib,
6910         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6911 };
6912
6913 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6914         .type = AMDGPU_RING_TYPE_KIQ,
6915         .align_mask = 0xff,
6916         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6917         .support_64bit_ptrs = false,
6918         .get_rptr = gfx_v8_0_ring_get_rptr,
6919         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6920         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6921         .emit_frame_size =
6922                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6923                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6924                 5 + /* hdp_invalidate */
6925                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6926                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6927                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6928         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6929         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6930         .test_ring = gfx_v8_0_ring_test_ring,
6931         .insert_nop = amdgpu_ring_insert_nop,
6932         .pad_ib = amdgpu_ring_generic_pad_ib,
6933         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6934         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6935 };
6936
6937 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6938 {
6939         int i;
6940
6941         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6942
6943         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6944                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6945
6946         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6947                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6948 }
6949
6950 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6951         .set = gfx_v8_0_set_eop_interrupt_state,
6952         .process = gfx_v8_0_eop_irq,
6953 };
6954
6955 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6956         .set = gfx_v8_0_set_priv_reg_fault_state,
6957         .process = gfx_v8_0_priv_reg_irq,
6958 };
6959
6960 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6961         .set = gfx_v8_0_set_priv_inst_fault_state,
6962         .process = gfx_v8_0_priv_inst_irq,
6963 };
6964
6965 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6966         .set = gfx_v8_0_set_cp_ecc_int_state,
6967         .process = gfx_v8_0_cp_ecc_error_irq,
6968 };
6969
6970 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6971         .set = gfx_v8_0_set_sq_int_state,
6972         .process = gfx_v8_0_sq_irq,
6973 };
6974
6975 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6976 {
6977         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6978         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6979
6980         adev->gfx.priv_reg_irq.num_types = 1;
6981         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6982
6983         adev->gfx.priv_inst_irq.num_types = 1;
6984         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6985
6986         adev->gfx.cp_ecc_error_irq.num_types = 1;
6987         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6988
6989         adev->gfx.sq_irq.num_types = 1;
6990         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6991 }
6992
6993 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6994 {
6995         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6996 }
6997
6998 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6999 {
7000         /* init asci gds info */
7001         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7002         adev->gds.gws_size = 64;
7003         adev->gds.oa_size = 16;
7004         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7005 }
7006
7007 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7008                                                  u32 bitmap)
7009 {
7010         u32 data;
7011
7012         if (!bitmap)
7013                 return;
7014
7015         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7016         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7017
7018         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7019 }
7020
7021 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7022 {
7023         u32 data, mask;
7024
7025         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7026                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7027
7028         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7029
7030         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7031 }
7032
7033 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7034 {
7035         int i, j, k, counter, active_cu_number = 0;
7036         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7037         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7038         unsigned disable_masks[4 * 2];
7039         u32 ao_cu_num;
7040
7041         memset(cu_info, 0, sizeof(*cu_info));
7042
7043         if (adev->flags & AMD_IS_APU)
7044                 ao_cu_num = 2;
7045         else
7046                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7047
7048         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7049
7050         mutex_lock(&adev->grbm_idx_mutex);
7051         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7052                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7053                         mask = 1;
7054                         ao_bitmap = 0;
7055                         counter = 0;
7056                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7057                         if (i < 4 && j < 2)
7058                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7059                                         adev, disable_masks[i * 2 + j]);
7060                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7061                         cu_info->bitmap[i][j] = bitmap;
7062
7063                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7064                                 if (bitmap & mask) {
7065                                         if (counter < ao_cu_num)
7066                                                 ao_bitmap |= mask;
7067                                         counter ++;
7068                                 }
7069                                 mask <<= 1;
7070                         }
7071                         active_cu_number += counter;
7072                         if (i < 2 && j < 2)
7073                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7074                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7075                 }
7076         }
7077         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7078         mutex_unlock(&adev->grbm_idx_mutex);
7079
7080         cu_info->number = active_cu_number;
7081         cu_info->ao_cu_mask = ao_cu_mask;
7082         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7083         cu_info->max_waves_per_simd = 10;
7084         cu_info->max_scratch_slots_per_cu = 32;
7085         cu_info->wave_front_size = 64;
7086         cu_info->lds_size = 64;
7087 }
7088
7089 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7090 {
7091         .type = AMD_IP_BLOCK_TYPE_GFX,
7092         .major = 8,
7093         .minor = 0,
7094         .rev = 0,
7095         .funcs = &gfx_v8_0_ip_funcs,
7096 };
7097
7098 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7099 {
7100         .type = AMD_IP_BLOCK_TYPE_GFX,
7101         .major = 8,
7102         .minor = 1,
7103         .rev = 0,
7104         .funcs = &gfx_v8_0_ip_funcs,
7105 };
7106
7107 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7108 {
7109         uint64_t ce_payload_addr;
7110         int cnt_ce;
7111         union {
7112                 struct vi_ce_ib_state regular;
7113                 struct vi_ce_ib_state_chained_ib chained;
7114         } ce_payload = {};
7115
7116         if (ring->adev->virt.chained_ib_support) {
7117                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7118                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7119                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7120         } else {
7121                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7122                         offsetof(struct vi_gfx_meta_data, ce_payload);
7123                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7124         }
7125
7126         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7127         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7128                                 WRITE_DATA_DST_SEL(8) |
7129                                 WR_CONFIRM) |
7130                                 WRITE_DATA_CACHE_POLICY(0));
7131         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7132         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7133         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7134 }
7135
7136 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7137 {
7138         uint64_t de_payload_addr, gds_addr, csa_addr;
7139         int cnt_de;
7140         union {
7141                 struct vi_de_ib_state regular;
7142                 struct vi_de_ib_state_chained_ib chained;
7143         } de_payload = {};
7144
7145         csa_addr = amdgpu_csa_vaddr(ring->adev);
7146         gds_addr = csa_addr + 4096;
7147         if (ring->adev->virt.chained_ib_support) {
7148                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7149                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7150                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7151                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7152         } else {
7153                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7154                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7155                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7156                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7157         }
7158
7159         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7160         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7161                                 WRITE_DATA_DST_SEL(8) |
7162                                 WR_CONFIRM) |
7163                                 WRITE_DATA_CACHE_POLICY(0));
7164         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7165         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7166         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7167 }